ProtonDataLabs commited on
Commit
136ada0
1 Parent(s): 71f9be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -273
app.py CHANGED
@@ -5,46 +5,92 @@ import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import matplotlib.dates as mdates
7
  import plotly.express as px
 
8
  import re
9
  from datetime import datetime, timedelta
10
  import warnings
11
- # Load the data
12
- df = pd.read_csv(r"fy21-24.csv",
13
- dtype={"FyWeek": "string",
14
- "Fy": "category",
15
- "Chaincode": "category",
16
- "Store": "category",
17
- "Address": "string",
18
- "Zipcode": "float",
19
- "City": "category",
20
- "State": "category",
21
- "Containercode": "category",
22
- "Itemtype": "category",
23
- "SalesVolume":"float",
24
- "UnitPrice":"float",
25
- "Sales":"float"})
26
-
27
- # Convert columns
28
- df["Zipcode"] = df["Zipcode"].convert_dtypes()
29
- df["SalesVolume"] = df["SalesVolume"].convert_dtypes()
30
-
31
- # Title for the app
32
- st.title('Sales Data Dashboard')
33
-
34
- # Initialize session state for storing which card was clicked and item type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  if 'active_card' not in st.session_state:
36
  st.session_state['active_card'] = None
37
  if 'selected_item_type' not in st.session_state:
38
  st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
39
- # Initialize session state for storing the selected state and feature
40
- if 'selected_state' not in st.session_state:
41
- st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
42
  if 'selected_feature' not in st.session_state:
43
  st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
44
 
45
- # Two columns for the card buttons
46
- col1, col2, col3, col4 = st.columns(4)
47
-
48
  # Define buttons for plot categories, update session state when clicked
49
  with col1:
50
  if st.button("Sales Volume Trend for Item Category"):
@@ -54,67 +100,48 @@ with col2:
54
  if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
55
  st.session_state['active_card'] = 'card2'
56
 
57
- with col3:
58
- if st.button("Price vs Sales Trend by Year"):
59
- st.session_state['active_card'] = 'card3'
 
 
60
 
61
- with col4:
62
- if st.button("Total Sales Volume by Price Band"):
63
- st.session_state['active_card'] = 'card4'
64
 
 
 
 
65
 
66
- ########################################### CARD #1 ####################################################
67
  if st.session_state['active_card'] == 'card1':
68
- # Create short fiscal week display
69
- df['Fiscal Year'] = df['FyWeek'].apply(lambda x: int(x.split(' ')[1]))
70
- df['Week Number'] = df['FyWeek'].apply(lambda x: int(x.split('Week ')[1]))
71
- df = df.sort_values(by=['Fiscal Year', 'Week Number'])
72
-
73
- # Reformat 'Fiscal Week' for display (e.g., 'FY21W51')
74
- df['Fiscal Week Short'] = df.apply(lambda x: f"FY{x['Fiscal Year']%100}W{x['Week Number']}", axis=1)
75
-
76
- # Ensure the short fiscal week column is treated as a categorical variable and sorted by the order of appearance
77
- df['Fiscal Week Short'] = pd.Categorical(df['Fiscal Week Short'], categories=df['Fiscal Week Short'].unique(), ordered=True)
78
- # Dropdown for selecting the state (using session_state)
79
- st.session_state['selected_state'] = st.selectbox('Select State', df['State'].unique(),
80
- index=list(df['State'].unique()).index(st.session_state['selected_state']))
81
-
82
- # Dropdown for selecting the feature for grouping (using session_state)
83
- st.session_state['selected_feature'] = st.selectbox('Select Feature for Grouping',
84
- ['Chaincode', 'Itemtype', 'FyWeek'],
85
- index=['Chaincode', 'Itemtype', 'FyWeek'].index(st.session_state['selected_feature']))
86
-
87
  # Filter the dataframe based on selected state
88
- filtered_df = df[df['State'] == st.session_state['selected_state']]
89
-
90
- # Plot based on user's selected feature
91
- if st.session_state['selected_feature'] == 'Itemtype':
92
- st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Item Type')
93
- group_data = filtered_df.groupby(['FyWeek', 'Itemtype'])['SalesVolume'].sum().reset_index()
94
- fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Itemtype',
95
- title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Item Type',
96
- labels={'SalesVolume': 'Sales Volume'})
97
-
98
- elif st.session_state['selected_feature'] == 'Chaincode':
99
- st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Chain Code')
100
- group_data = filtered_df.groupby(['FyWeek', 'Chaincode'])['SalesVolume'].sum().reset_index()
101
- fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color='Chaincode',
102
- title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]} by Chain Code',
103
- labels={'SalesVolume': 'Sales Volume'})
104
-
105
- elif st.session_state['selected_feature'] == 'FyWeek':
106
- st.subheader(f'Sales Data for {st.session_state["selected_state"]} - Grouped by Fiscal Week')
107
- group_data = filtered_df.groupby(['FyWeek'])['SalesVolume'].sum().reset_index()
108
- fig = px.bar(group_data, x='FyWeek', y='SalesVolume',
109
- title=f'Sales Volume over Fiscal Week in {st.session_state["selected_state"]}',
110
- labels={'SalesVolume': 'Sales Volume'})
111
-
112
- # Display the interactive plot
113
  st.plotly_chart(fig)
 
114
  ##########################################################################################################
115
 
 
116
  ########################################### CARD #2 ####################################################
117
- # Card 2: Sales Volume & Unit Price Correlation plot for Item Category and Container Code
 
 
 
 
118
  if st.session_state['active_card'] == 'card2':
119
  # Dropdown to select item type (using session_state)
120
  st.session_state['selected_item_type'] = st.selectbox(
@@ -124,33 +151,18 @@ if st.session_state['active_card'] == 'card2':
124
  # Dropdown to select the grouping category (container code, chain code, or state)
125
  group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])
126
 
127
- st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option}")
128
-
129
- # Group the dataframe and prepare for plotting
130
- df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
131
- 'SalesVolume': 'sum',
132
- 'UnitPrice': 'mean',
133
- 'Sales': 'sum'
134
- }).reset_index()
135
 
136
- # Function to extract date from fiscal week
137
- def dt_from_fy_week(fyweek):
138
- fy, w = re.findall(r'\d+', fyweek)
139
- week1_start = datetime.strptime("{}-08-01".format(int(fy) - 1), "%Y-%m-%d")
140
- return (week1_start + timedelta(weeks=int(w) - 1)).date()
141
 
142
- # Add columns for date and promo to data
143
- df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
144
  df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
145
  df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
146
  df["Promo"] = df["Promo"].astype("category")
147
-
148
- # Split FyWeek into fiscal year and week number
149
- df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
150
- df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
151
 
152
- # Filter the dataframe based on the selected item type
153
- filtered_df = df[df['Itemtype'] == st.session_state['selected_item_type']]
154
 
155
  # Find the top 3 values based on total SalesVolume in the selected grouping category
156
  top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
@@ -158,183 +170,78 @@ if st.session_state['active_card'] == 'card2':
158
  # Filter the data for only the top 3 values
159
  top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
160
 
161
- # Group by Year, Week, Dt, and the selected category and aggregate SalesVolume and UnitPrice
162
  agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
163
  'SalesVolume': 'sum',
164
  'UnitPrice': 'mean'
165
  }).reset_index()
166
 
167
- # Loop through the top 3 values and create separate plots
 
 
 
168
  for value in top_3_values:
169
  value_data = agg_df[agg_df[group_by_option] == value]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
- # Create a new figure for each group
172
- fig, (axd, axp) = plt.subplots(2, 1, figsize=(10, 6))
173
 
174
- # Plot SalesVolume
175
- sns.lineplot(data=value_data, x='Dt', y='SalesVolume', ax=axd)
176
- axd.set_title(f"SalesVolume - {value} ({group_by_option})")
177
- axd.grid(True, linestyle='--', color='gray', alpha=0.7)
178
 
179
- # Plot mean line for SalesVolume
180
- axd.axhline(value_data['SalesVolume'].mean(), ls="--", color="r")
181
- axd.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
182
- axd.set_xticklabels([])
183
-
184
- # Plot UnitPrice
185
- sns.lineplot(data=value_data, x='Dt', y='UnitPrice', ax=axp, color='green', errorbar='sd')
186
- axp.set_title(f"UnitPrice - {value} ({group_by_option})")
187
- axp.grid(True, linestyle='--', color='gray', alpha=0.7)
188
-
189
- # Plot mean line for UnitPrice
190
- axp.axhline(value_data['UnitPrice'].mean(), ls="--", color="r")
191
- axp.xaxis.set_major_locator(mdates.MonthLocator(interval=2))
192
- axp.tick_params(axis='x', rotation=90)
193
-
194
- # Adjust layout for each figure
195
- plt.tight_layout()
196
-
197
- # Display the plot in Streamlit
198
- st.pyplot(fig)
199
- ###############################################################################################
200
-
201
- ########################################### CARD #3 ####################################################
202
- # Check which card was selected using session state
203
- if st.session_state['active_card'] == 'card3':
204
- # Dropdown for selecting the Item Type
205
- st.session_state['selected_item_type'] = st.selectbox('Select Item Type', df['Itemtype'].unique(),
206
- index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
207
- df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
208
- 'SalesVolume': 'sum',
209
- 'UnitPrice':'mean',
210
- 'Sales': 'sum'}).reset_index()
211
- # add promo and date columns to data
212
- def dt_from_fy_week(fyweek):
213
-
214
- fy, w = re.findall(r'\d+', fyweek)
215
-
216
- week1_start = datetime.strptime("{}-08-01".format(int(fy)-1), "%Y-%m-%d")
217
-
218
- return (week1_start + timedelta(weeks=int(w)-1)).date()
219
-
220
- df['Dt'] = df['FyWeek'].apply(dt_from_fy_week)
221
- df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
222
- df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3','4','5','6']) , 'Promo', 'NoPromo')
223
- df["Promo"] = df["Promo"].astype("category")
224
- # Split FyWeek into fiscal year and week number
225
- df['Week'] = df['FyWeek'].str.split().str[-1].astype(int)
226
- # df_21['Year'] = df_21['Fy'].str.extract(r'(\d+)').astype(int)
227
- df['Year'] = df['FyWeek'].str.split().str[1].astype(int)
228
-
229
- # Define the fiscal years
230
- years = ["FY 2021", "FY 2022", "FY 2023", "FY 2024"]
231
-
232
- # Set up a 2x2 grid of subplots for the four years
233
- fig, axs = plt.subplots(2, 2, figsize=(12, 8))
234
-
235
- # Loop through each year and create a plot in the grid
236
- for i, fy in enumerate(years):
237
- ax = axs.flat[i]
238
-
239
- # Plot Promo data
240
- sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "Promo")],
241
- x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='.', line_kws=dict(color="r"), ax=ax, label="Promo")
242
-
243
- # Plot NoPromo data
244
- sns.regplot(data=df[(df["Itemtype"] == st.session_state['selected_item_type']) & (df["Fy"] == fy) & (df["Promo"] == "NoPromo")],
245
- x="UnitPrice", y="SalesVolume", lowess=True, ci=None, marker='x', line_kws=dict(color="g"), ax=ax, label="NoPromo")
246
-
247
- # Set the title of each subplot
248
- ax.set_title(f"{st.session_state['selected_item_type']} - {fy}")
249
- ax.legend(loc="best")
250
-
251
- # Set the overall title for the figure
252
- fig.suptitle(f"Price vs SalesVolume for {st.session_state['selected_item_type']} across years")
253
-
254
- # Adjust layout to prevent overlap
255
- fig.tight_layout(rect=[0, 0, 1, 0.95])
256
-
257
- # Display the plot
258
- st.pyplot(fig)
259
-
260
- ###############################################################################################
261
-
262
- ########################################### CARD #4 ####################################################
263
- if st.session_state['active_card'] == 'card4':
264
- # Define the fiscal years
265
- years = ['FY 2021', 'FY 2022', 'FY 2023', 'FY 2024']
266
- df = df.groupby(['FyWeek','Fy','Chaincode','Store','Address','Zipcode','City','State','Containercode','Itemtype'],observed=True).agg({
267
- 'SalesVolume': 'sum',
268
- 'UnitPrice':'mean',
269
- 'Sales': 'sum'}).reset_index()
270
-
271
- # Dropdown for selecting the Item Type (using session_state)
272
- st.session_state['selected_item_type'] = st.selectbox('Select Item Type',
273
- df['Itemtype'].unique(),
274
- index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
275
-
276
- # Set up a 2x2 grid of subplots for the four years
277
- fig, axes = plt.subplots(2, 2, figsize=(16, 12))
278
- axes = axes.flatten() # To access axes easily in a loop
279
-
280
- # Loop through each year and plot the data
281
- for i, year in enumerate(years):
282
- # print(st.session_state['selected_item_type'])
283
- # Filter data for the specific year and item type selected
284
- cage_data = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Fy'] == year)]
285
- cage_data['Itemtype'] = cage_data['Itemtype'].cat.remove_unused_categories()
286
- cage_data['Containercode'] = cage_data['Containercode'].cat.remove_unused_categories()
287
- # print(cage_data['Itemtype'].unique())
288
- relevant_container_codes = cage_data['Containercode'].unique()
289
- print(relevant_container_codes)
290
- # Calculate price bands
291
- lower_band = cage_data['UnitPrice'].quantile(0.25)
292
- median_band = cage_data['UnitPrice'].quantile(0.50)
293
- higher_band = cage_data['UnitPrice'].quantile(0.75)
294
-
295
- # Get data for each price band
296
- lower_band_data = cage_data[cage_data['UnitPrice'] <= lower_band]
297
- median_band_data = cage_data[(cage_data['UnitPrice'] > lower_band) & (cage_data['UnitPrice'] <= median_band)]
298
- higher_band_data = cage_data[cage_data['UnitPrice'] > higher_band]
299
- # print(lower_band_data['Containercode'].unique())
300
- # Aggregate SalesVolume and average UnitPrice for each container code in each pricing band
301
- lower_band_agg = lower_band_data.groupby('Containercode',observed=True).agg(
302
- total_sales_volume=('SalesVolume', 'sum'),
303
- avg_unit_price=('UnitPrice', 'mean')
304
- ).reset_index()
305
-
306
- median_band_agg = median_band_data.groupby('Containercode',observed=True).agg(
307
- total_sales_volume=('SalesVolume', 'sum'),
308
- avg_unit_price=('UnitPrice', 'mean')
309
- ).reset_index()
310
-
311
- higher_band_agg = higher_band_data.groupby('Containercode',observed=True).agg(
312
- total_sales_volume=('SalesVolume', 'sum'),
313
- avg_unit_price=('UnitPrice', 'mean')
314
- ).reset_index()
315
-
316
- # Add the price band labels
317
- lower_band_agg['PriceBand'] = 'Lower Band'
318
- median_band_agg['PriceBand'] = 'Median Band'
319
- higher_band_agg['PriceBand'] = 'Higher Band'
320
-
321
- # Combine the data for plotting
322
- combined_data = pd.concat([lower_band_agg, median_band_agg, higher_band_agg])
323
- combined_data = combined_data[combined_data['Containercode'].isin(relevant_container_codes)]
324
-
325
- # Plot Total Sales Volume for each price band in the current subplot
326
- sns.barplot(x='Containercode', y='total_sales_volume', hue='PriceBand', data=combined_data, ax=axes[i])
327
-
328
- # Set the title and customize x-axis for each subplot
329
- axes[i].set_title(f"Total Sales Volume by Container Code and Price Band for {year}")
330
- axes[i].set_xlabel('Container Code')
331
- axes[i].set_ylabel('Total Sales Volume')
332
- axes[i].tick_params(axis='x', rotation=45)
333
-
334
- # Adjust the layout so titles and labels don't overlap
335
- plt.tight_layout()
336
-
337
- # Display the plot in Streamlit
338
- st.pyplot(fig)
339
-
340
- ###############################################################################################
 
5
  import seaborn as sns
6
  import matplotlib.dates as mdates
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
9
  import re
10
  from datetime import datetime, timedelta
11
  import warnings
12
+ import time
13
+ import dask.dataframe as dd
14
+
15
+ @st.cache_data
16
+ def date_from_week(year, week):
17
+ # Assuming the fiscal year starts in August and the week starts from August 1st
18
+ base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
19
+ dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
20
+ return dates
21
+
22
+ @st.cache_data
23
+ def load_data(active_card):
24
+ # st.write(f"{active_card}")
25
+ # Define columns common to multiple cards if there are any
26
+ common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']
27
+
28
+ # Columns specific to cards
29
+ card_specific_cols = {
30
+ 'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'],
31
+ 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
32
+ }
33
+
34
+ # Choose columns based on the active card
35
+ required_columns = card_specific_cols.get(active_card, common_cols)
36
+
37
+ # Define the data types for efficient memory usage
38
+ dtype_spec = {
39
+ 'FyWeek': 'string',
40
+ 'Fy': 'category', # Add data type for 'Fy' if it's used
41
+ 'Itemtype': 'category',
42
+ 'Chaincode': 'category',
43
+ 'State': 'category',
44
+ "Store": "category",
45
+ 'Containercode': 'category',
46
+ "Address": "string",
47
+ "Zipcode": "float",
48
+ "City": "category",
49
+ 'SalesVolume': 'float',
50
+ 'UnitPrice': 'float',
51
+ 'Sales': 'float'
52
+ }
53
+
54
+ # Read only the necessary columns
55
+ # st.write(required_columns)
56
+ ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
57
+ df = ddf.compute()
58
+
59
+
60
+ if active_card in ['card2']:
61
+ df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
62
+ 'SalesVolume': 'sum',
63
+ 'UnitPrice': 'mean',
64
+ 'Sales': 'sum'
65
+ }).reset_index()
66
+ df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
67
+ df['Week'] = df['Week'].astype(int) # Convert 'Week' to int
68
+ df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int
69
+ df['Dt'] = date_from_week(df['Year'], df['Week'])
70
+
71
+ # st.write(df.columns)
72
+ return df
73
+
74
+ # Display logo
75
+ st.image("bonnie.png", width=150) # Adjust width as needed
76
+
77
+ # Display title
78
+ st.title("Bonnie Plants Pricing & Sales Analytics Dashboard")
79
+
80
+ # Close the div for logo and title
81
+ st.markdown('</div>', unsafe_allow_html=True)
82
+
83
+ # Initialize session state for storing which card was clicked and item type
84
  if 'active_card' not in st.session_state:
85
  st.session_state['active_card'] = None
86
  if 'selected_item_type' not in st.session_state:
87
  st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
88
+
 
 
89
  if 'selected_feature' not in st.session_state:
90
  st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
91
 
92
+ # Card selection buttons
93
+ col1, col2 = st.columns(2)
 
94
  # Define buttons for plot categories, update session state when clicked
95
  with col1:
96
  if st.button("Sales Volume Trend for Item Category"):
 
100
  if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"):
101
  st.session_state['active_card'] = 'card2'
102
 
103
+ start_time=time.time()
104
+ # st.write(st.session_state['active_card'])
105
+ df = load_data(st.session_state['active_card'])
106
+ time_taken = time.time() - start_time
107
+ st.write(f"Data loaded in {time_taken:.2f} seconds")
108
 
 
 
 
109
 
110
+ # Initialize session state for storing the selected state and feature
111
+ if 'selected_state' not in st.session_state:
112
+ st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state
113
 
114
+ ############################################ CARD #1 ####################################################
115
  if st.session_state['active_card'] == 'card1':
116
+ # st.write("Processing card1...")
117
+ # Dropdown for selecting the state
118
+ selected_state = st.selectbox('Select State', df['State'].unique())
119
+ # Dropdown for selecting the feature for grouping
120
+ selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',])
121
+
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # Filter the dataframe based on selected state
123
+ filtered_df = df[df['State'] == selected_state]
124
+
125
+ # Time the grouping operation
126
+ start_time = time.time()
127
+ group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index()
128
+ time_taken = time.time() - start_time
129
+
130
+ # Plotting
131
+ fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature,
132
+ title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}',
133
+ labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  st.plotly_chart(fig)
135
+
136
  ##########################################################################################################
137
 
138
+
139
  ########################################### CARD #2 ####################################################
140
+
141
+
142
+
143
+
144
+
145
  if st.session_state['active_card'] == 'card2':
146
  # Dropdown to select item type (using session_state)
147
  st.session_state['selected_item_type'] = st.selectbox(
 
151
  # Dropdown to select the grouping category (container code, chain code, or state)
152
  group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State'])
153
 
154
+ # Multi-select checkbox to select multiple years
155
+ selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])
 
 
 
 
 
 
156
 
157
+ st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")
 
 
 
 
158
 
159
+ # Convert 'Dt' column to datetime
 
160
  df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
161
  df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
162
  df["Promo"] = df["Promo"].astype("category")
 
 
 
 
163
 
164
+ # Filter the dataframe based on the selected item type and selected years
165
+ filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]
166
 
167
  # Find the top 3 values based on total SalesVolume in the selected grouping category
168
  top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
 
170
  # Filter the data for only the top 3 values
171
  top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
172
 
173
+ # Aggregate data
174
  agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
175
  'SalesVolume': 'sum',
176
  'UnitPrice': 'mean'
177
  }).reset_index()
178
 
179
+ # Create a new column 'week-year' for X-axis labels
180
+ agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')
181
+
182
+ # Loop through the top 3 values and create separate plots using Plotly
183
  for value in top_3_values:
184
  value_data = agg_df[agg_df[group_by_option] == value]
185
+ # Assuming you have 'value_data' from your previous code
186
+ mean_sales_volume = value_data['SalesVolume'].mean()
187
+ mean_unit_price = value_data['UnitPrice'].mean()
188
+
189
+ # Create a Plotly figure
190
+ fig = go.Figure()
191
+
192
+ # Add SalesVolume trace
193
+ fig.add_trace(go.Scatter(
194
+ x=value_data['week-year'],
195
+ y=value_data['SalesVolume'],
196
+ mode='lines+markers',
197
+ name='SalesVolume',
198
+ line=dict(color='blue'),
199
+ hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}'
200
+ ))
201
+
202
+ # Add UnitPrice trace on a secondary Y-axis
203
+ fig.add_trace(go.Scatter(
204
+ x=value_data['week-year'],
205
+ y=value_data['UnitPrice'],
206
+ mode='lines+markers',
207
+ name='UnitPrice',
208
+ line=dict(color='green'),
209
+ yaxis='y2',
210
+ hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}'
211
+ ))
212
+ # Add mean line for SalesVolume
213
+ fig.add_shape(type="line",
214
+ x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
215
+ y0=mean_sales_volume, y1=mean_sales_volume,
216
+ line=dict(color="blue", width=2, dash="dash"),
217
+ xref='x', yref='y')
218
+
219
+ # Add mean line for UnitPrice (on secondary Y-axis)
220
+ fig.add_shape(type="line",
221
+ x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
222
+ y0=mean_unit_price, y1=mean_unit_price,
223
+ line=dict(color="green", width=2, dash="dash"),
224
+ xref='x', yref='y2')
225
+
226
+ # Update layout for dual axes
227
+ fig.update_layout(
228
+ template='plotly_white',
229
+ title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
230
+ xaxis_title='Week-Year',
231
+ yaxis_title='Sales Volume',
232
+ yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
233
+ legend=dict(x=0.9, y=1.15),
234
+ hovermode="x unified", # Show both values in a tooltip
235
+ height=600,
236
+ margin=dict(l=50, r=50, t=50, b=50)
237
+ )
238
+
239
+ # Rotate X-axis labels
240
+ fig.update_xaxes(tickangle=90)
241
+
242
+ # Display the Plotly figure in Streamlit
243
+ st.plotly_chart(fig, use_container_width=True)
244
 
 
 
245
 
 
 
 
 
246
 
247
+ ##########################################################################################################