import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import matplotlib.dates as mdates import plotly.express as px import plotly.graph_objects as go import re from datetime import datetime, timedelta import warnings import time import dask.dataframe as dd state_to_region = { # WEST 'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST', 'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST', # SOUTHWEST 'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST', # MIDWEST 'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST', 'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST', 'SD': 'MIDWEST', 'WI': 'MIDWEST', # SOUTHEAST 'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST', 'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST', 'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST', 'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST', # NORTHEAST 'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST', 'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST', 'VT': 'NORTHEAST' } @st.cache_data def date_from_week(year, week): # Assuming the fiscal year starts in August and the week starts from August 1st base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') return dates @st.cache_data def load_data(active_card): # st.write(f"{active_card}") # Define columns common to multiple cards if there are any common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] # Columns specific to cards card_specific_cols = { 'card1': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], # 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], 'card3': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card } # Choose columns based on the active card required_columns = card_specific_cols.get(active_card, common_cols) # Define the data types for efficient memory usage dtype_spec = { 'FyWeek': 'string', 'Fy': 'category', # Add data type for 'Fy' if it's used 'Itemtype': 'category', 'Chaincode': 'category', 'State': 'category', "Store": "category", 'Containercode': 'category', "Address": "string", "Zipcode": "float", "City": "category", 'SalesVolume': 'float', 'UnitPrice': 'float', 'Sales': 'float' } # Read only the necessary columns # st.write(required_columns) ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) df = ddf.compute() # st.write("+++++++++++++++++++++++") if active_card in ['card1','card2', 'card3',]: df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean', 'Sales': 'sum' }).reset_index() df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) df['Week'] = df['Week'].astype(int) # Convert 'Week' to int df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int df['Dt'] = date_from_week(df['Year'], df['Week']) # Add the region column based on state df['Region'] = df['State'].map(state_to_region) return df # Display logo st.image("bonnie.png", width=150) # Adjust width as needed # Display title # st.title("Price vs. Sales Volume Tracker Dashboard") # Initialize session state for storing which card was clicked and item type if 'active_card' not in st.session_state: st.session_state['active_card'] = None if 'selected_item_type' not in st.session_state: st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' if 'selected_feature' not in st.session_state: st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' # Card selection buttons with logic to reset session state on switch col1, col3 = st.columns(2) with col1: if st.button("Sales Volume Trend"): st.session_state['active_card'] = 'card1' # Reset other selections when switching cards st.session_state['selected_state'] = None st.session_state['selected_chaincode'] = None st.session_state['selected_itemtype'] = None st.session_state['selected_containercode'] = None # with col2: # if st.button("Sales Volume vs Median Unit Price Trend"): # st.session_state['active_card'] = 'card2' # # Reset selections when switching cards # st.session_state['selected_state'] = None # st.session_state['selected_chaincode'] = None # st.session_state['selected_itemtype'] = None # st.session_state['selected_containercode'] = None with col3: if st.button("Price Elasticity Coefficient Trend YoY"): st.session_state['active_card'] = 'card3' # Reset selections when switching cards st.session_state['selected_state'] = None st.session_state['selected_chaincode'] = None st.session_state['selected_itemtype'] = None st.session_state['selected_containercode'] = None # Load data for the current card start_time = time.time() df = load_data(st.session_state['active_card']) time_taken = time.time() - start_time st.write(f"Data loaded in {time_taken:.2f} seconds") ############################################ CARD #1 #################################################### if st.session_state['active_card'] == 'card1': # Step 1: Sales Volume vs FyWeek for the whole dataset (no filter) st.subheader("Total Sales Volume by Fiscal Week") df['FY_Week'] = df['FY'].astype(str) + '_' + df['Week'].astype(str) # Split FY_Week again for correct sorting if not df.empty and 'FY_Week' in df.columns: total_sales_df = df.groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() total_sales_df[['FY', 'Week']] = total_sales_df['FY_Week'].str.split('_', expand=True) total_sales_df['Week'] = total_sales_df['Week'].astype(int) total_sales_df = total_sales_df.sort_values(by=['FY', 'Week']) # Create a line chart using Plotly fig = px.line(total_sales_df, x='FY_Week', y='SalesVolume', labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) st.plotly_chart(fig) # Step 2: Top 3 states based on sales volume as buttons/cards top_states = df.groupby('State', observed=True)['SalesVolume'].sum().nlargest(3).index st.write("### Top 3 Selling States in the last 4 years (drill down by state)") col1, col2, col3 = st.columns(3) if len(top_states) > 0 and col1.button(top_states[0]): st.session_state['selected_state'] = top_states[0] if len(top_states) > 1 and col2.button(top_states[1]): st.session_state['selected_state'] = top_states[1] if len(top_states) > 2 and col3.button(top_states[2]): st.session_state['selected_state'] = top_states[2] # If a state is selected, show the corresponding plot if 'selected_state' in st.session_state and st.session_state['selected_state']: selected_state = st.session_state['selected_state'] # Step 3: Sales volume vs FyWeek for the selected state st.subheader(f"Sales Volume by Fiscal Week for {selected_state} (drill down by Chaincode) ") state_sales_df = df[df['State'] == selected_state].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() if not state_sales_df.empty and 'FY_Week' in state_sales_df.columns: state_sales_df[['FY', 'Week']] = state_sales_df['FY_Week'].str.split('_', expand=True) state_sales_df['Week'] = state_sales_df['Week'].astype(int) state_sales_df = state_sales_df.sort_values(by=['FY', 'Week']) fig = px.line(state_sales_df, x='FY_Week', y='SalesVolume', labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) st.plotly_chart(fig) # Step 4: Top 3 chaincodes based on sales volume as buttons/cards top_chaincodes = df[df['State'] == selected_state].groupby('Chaincode', observed=True)['SalesVolume'].sum().nlargest(3).index st.write(f"### Top 3 selling Chaincode in {selected_state}:") # Add a check to ensure top_chaincodes has values before accessing col1, col2, col3 = st.columns(3) if len(top_chaincodes) > 0 and col1.button(top_chaincodes[0]): st.session_state['selected_chaincode'] = top_chaincodes[0] if len(top_chaincodes) > 1 and col2.button(top_chaincodes[1]): st.session_state['selected_chaincode'] = top_chaincodes[1] if len(top_chaincodes) > 2 and col3.button(top_chaincodes[2]): st.session_state['selected_chaincode'] = top_chaincodes[2] # If a chaincode is selected, show the corresponding plot if 'selected_chaincode' in st.session_state: selected_chaincode = st.session_state['selected_chaincode'] # Step 5: Sales volume vs FyWeek for the selected chaincode in the selected state st.subheader(f"Sales Volume by Fiscal Week for {selected_chaincode} in {selected_state}") chain_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index() if not chain_sales_df.empty and 'FY_Week' in chain_sales_df.columns: chain_sales_df[['FY', 'Week']] = chain_sales_df['FY_Week'].str.split('_', expand=True) chain_sales_df['Week'] = chain_sales_df['Week'].astype(int) chain_sales_df = chain_sales_df.sort_values(by=['FY', 'Week']) fig = px.line(chain_sales_df, x='FY_Week', y='SalesVolume', # title=f'Sales Volume vs Fiscal Week in {selected_chaincode}, {selected_state}', labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'}) st.plotly_chart(fig) # Step 6: Top 3 itemtypes based on sales volume as buttons/cards top_itemtypes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('Itemtype', observed=True)['SalesVolume'].sum().nlargest(3).index st.write(f"### Top Item Type in {selected_chaincode}, {selected_state} (drill down by ItemType) :") col1, col2, col3 = st.columns(3) if len(top_itemtypes) > 0 and col1.button(top_itemtypes[0]): st.session_state['selected_itemtype'] = top_itemtypes[0] if len(top_itemtypes) > 1 and col2.button(top_itemtypes[1]): st.session_state['selected_itemtype'] = top_itemtypes[1] if len(top_itemtypes) > 2 and col3.button(top_itemtypes[2]): st.session_state['selected_itemtype'] = top_itemtypes[2] # If an itemtype is selected, show the corresponding dual-axis plot for Sales Volume & Unit Price if 'selected_itemtype' in st.session_state: selected_itemtype = st.session_state['selected_itemtype'] # Step 7: Dual-axis plot for Sales volume and UnitPrice vs FyWeek for the selected itemtype # st.subheader(f"Sales Volume & Unit Price vs Fiscal Week for {selected_itemtype} in {selected_chaincode}, {selected_state}") item_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('FY_Week', observed=True).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean' }).reset_index() if not item_sales_df.empty and 'FY_Week' in item_sales_df.columns: item_sales_df[['FY', 'Week']] = item_sales_df['FY_Week'].str.split('_', expand=True) item_sales_df['Week'] = item_sales_df['Week'].astype(int) item_sales_df = item_sales_df.sort_values(by=['FY', 'Week']) # Dual-axis plot using Plotly Graph Objects fig = go.Figure() # Add SalesVolume trace fig.add_trace(go.Scatter( x=item_sales_df['FY_Week'], y=item_sales_df['SalesVolume'], mode='lines+markers', name='SalesVolume', line=dict(color='blue'), hovertemplate='SalesVolume: %{y}
Week-Year: %{x}' )) # Add UnitPrice trace with secondary Y-axis fig.add_trace(go.Scatter( x=item_sales_df['FY_Week'], y=item_sales_df['UnitPrice'], mode='lines+markers', name='UnitPrice', line=dict(color='green'), yaxis='y2', hovertemplate='UnitPrice: %{y}
Week-Year: %{x}' )) # Update layout for dual axes fig.update_layout( title=f"Sales Volume vs Unit Price by Fiscal Week for {selected_itemtype}, {selected_chaincode}, {selected_state}", xaxis_title='Fiscal Week', yaxis_title='Sales Volume', yaxis2=dict(title='Unit Price', overlaying='y', side='right'), legend=dict(x=0.9, y=1.15), hovermode="x unified", # Show both values in a tooltip height=600, margin=dict(l=50, r=50, t=50, b=50) ) # Rotate X-axis labels fig.update_xaxes(tickangle=90) # Display the Plotly figure in Streamlit st.plotly_chart(fig, use_container_width=True) # Step 8: Display Top/Bottom Container Codes and Stores st.subheader("Top & Bottom 3 Container Codes and Stores") # Get top and bottom 3 container codes based on SalesVolume top_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nlargest(3).reset_index() bottom_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index() # Get top and bottom 3 stores based on SalesVolume top_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nlargest(3).reset_index() bottom_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index() # Display top and bottom container codes side by side st.write("### Container Codes:") col1, col2 = st.columns(2) with col1: st.write("#### Top 3 Container Codes") st.dataframe(top_containercodes) with col2: st.write("#### Bottom 3 Container Codes") st.dataframe(bottom_containercodes) # Display top and bottom stores side by side st.write("### Stores:") col3, col4 = st.columns(2) with col3: st.write("#### Top 3 Stores") st.dataframe(top_stores) with col4: st.write("#### Bottom 3 Stores") st.dataframe(bottom_stores) ########################################################################################################## ########################################### CARD #2 #################################################### # if st.session_state['active_card'] == 'card2': # # Identify the top 10 Itemtypes based on total SalesVolume # top_10_itemtypes = df.groupby('Itemtype')['SalesVolume'].sum().nlargest(10).index # # Filter the DataFrame to include only the top 10 Itemtypes # df = df[df['Itemtype'].isin(top_10_itemtypes)] # # Dropdown to select item type (using session_state) # st.session_state['selected_item_type'] = st.selectbox( # 'Select Item Type', df['Itemtype'].unique(), # index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) # # Dropdown to select the grouping category (container code, chain code, or state) # group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State','Region']) # # Multi-select checkbox to select multiple years # selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) # st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") # # Convert 'Dt' column to datetime # df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') # df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') # df["Promo"] = df["Promo"].astype("category") # # Filter the dataframe based on the selected item type and selected years # filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] # # Find the top 3 values based on total SalesVolume in the selected grouping category # top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index # # Filter the data for only the top 3 values # top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] # # Aggregate data # agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ # 'SalesVolume': 'sum', # 'UnitPrice': 'mean' # }).reset_index() # # Create a new column 'week-year' for X-axis labels # agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') # # Loop through the top 3 values and create separate plots using Plotly # for value in top_3_values: # value_data = agg_df[agg_df[group_by_option] == value] # # Assuming you have 'value_data' from your previous code # mean_sales_volume = value_data['SalesVolume'].mean() # mean_unit_price = value_data['UnitPrice'].mean() # # Create a Plotly figure # fig = go.Figure() # # Add SalesVolume trace # fig.add_trace(go.Scatter( # x=value_data['week-year'], # y=value_data['SalesVolume'], # mode='lines+markers', # name='SalesVolume', # line=dict(color='blue'), # hovertemplate='SalesVolume: %{y}
Week-Year: %{x}' # )) # # Add UnitPrice trace on a secondary Y-axis # fig.add_trace(go.Scatter( # x=value_data['week-year'], # y=value_data['UnitPrice'], # mode='lines+markers', # name='UnitPrice', # line=dict(color='green'), # yaxis='y2', # hovertemplate='UnitPrice: %{y}
Week-Year: %{x}' # )) # # Add mean line for SalesVolume # fig.add_shape(type="line", # x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), # y0=mean_sales_volume, y1=mean_sales_volume, # line=dict(color="blue", width=2, dash="dash"), # xref='x', yref='y') # # Add mean line for UnitPrice (on secondary Y-axis) # fig.add_shape(type="line", # x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), # y0=mean_unit_price, y1=mean_unit_price, # line=dict(color="green", width=2, dash="dash"), # xref='x', yref='y2') # # Update layout for dual axes # fig.update_layout( # template='plotly_white', # title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", # xaxis_title='Week-Year', # yaxis_title='Sales Volume', # yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), # legend=dict(x=0.9, y=1.15), # hovermode="x unified", # Show both values in a tooltip # height=600, # margin=dict(l=50, r=50, t=50, b=50) # ) # # Rotate X-axis labels # fig.update_xaxes(tickangle=90) # # Display the Plotly figure in Streamlit # st.plotly_chart(fig, use_container_width=True) ################################ if st.session_state['active_card'] == 'card3': # Dropdown for selecting the item type item_type_options = df['Itemtype'].unique() selected_item_type = st.selectbox("Select Item Type", item_type_options) # Dropdown for selecting the region (multiple selection allowed) region_options = df['Region'].dropna().unique() selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options) # Filter data based on selected item type and selected regions filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] # Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean' }).reset_index() # Sort values by Region, Itemtype, Fy, and Promo for YOY calculation agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',]) # Calculate YOY percentage changes in Sales Volume and Unit Price agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100 agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100 # Calculate Price Elasticity Coefficient (PE) agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) # Exclude FY 2025 but keep FY 2021 even with NaN values agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025'] # Drop rows where PE_Coeff is NaN (optional) agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) agg_df_filtered = agg_df_filtered.rename(columns={ 'SalesVolume_pct_change': 'SlVol%change', 'UnitPrice_pct_change': 'UnPr%change', }) agg_df_filtered = agg_df_filtered.reset_index(drop=True) st.dataframe(agg_df_filtered) st.write(agg_df_filtered.shape) # Extract values for the current and previous years from row 1 and row 2 of the dataframe current_year_row = agg_df_filtered.iloc[1] # Row 1 - Current Year previous_year_row = agg_df_filtered.iloc[0] # Row 2 - Previous Year # Extract values for Unit Price and Sales Volume unit_price_current_year = current_year_row['UnitPrice'] unit_price_previous_year = previous_year_row['UnitPrice'] sales_volume_current_year = current_year_row['SalesVolume'] sales_volume_previous_year = previous_year_row['SalesVolume'] # Calculate percentage changes for Unit Price and Sales Volume unit_price_pct = ((unit_price_current_year - unit_price_previous_year) / unit_price_previous_year) * 100 sales_volume_pct = ((sales_volume_current_year - sales_volume_previous_year) / sales_volume_previous_year) * 100 # Calculate PE Coefficient pe_coeff = sales_volume_pct / unit_price_pct st.markdown(f'''### Calculations for Price Elasticity Coefficient''') st.latex(rf""" \text{{Unit Price \% Change}} = \frac{{{unit_price_current_year:.2f} - {unit_price_previous_year:.2f}}}{{{unit_price_previous_year:.2f}}} \times 100 = {unit_price_pct:.2f}\% """) # Sales Volume % Change st.latex(rf""" \text{{Sales Volume \% Change}} = \frac{{{sales_volume_current_year:.2f} - {sales_volume_previous_year:.2f}}}{{{sales_volume_previous_year:.2f}}} \times 100 = {sales_volume_pct:.2f}\% """) # PE Coefficient st.latex(rf""" \text{{PE Coefficient}} = \frac{{{sales_volume_pct:.2f}}}{{{unit_price_pct:.2f}}} = {pe_coeff:.2f} """) # Explanation for PE Coefficient Conditions st.markdown(f""" ### Interpretation of Price Elasticity (PE) Coefficient: The Price Elasticity (PE) coefficient reflects how sensitive sales volume is to changes in unit price. - If the **PE coefficient is positive**: 1. When the price increases, sales volume increases. 2. When the price decreases, sales volume decreases. - If the **PE coefficient is negative**: 1. When the price increases, sales volume decreases. 2. When the price decreases, sales volume increases. """) # Dynamic analysis based on the calculated PE coefficient and signs of changes if unit_price_pct > 0 and sales_volume_pct > 0: st.warning(f""" Both unit price and sales volume increased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** indicates that for every 1% increase in unit price, sales volume increased by approximately **{pe_coeff:.2f}%**. """) elif unit_price_pct < 0 and sales_volume_pct < 0: st.warning(f""" Both unit price and sales volume decreased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** suggests that for every 1% decrease in unit price, sales volume decreased by approximately **{pe_coeff:.2f}%**. """) elif unit_price_pct > 0 and sales_volume_pct < 0: st.warning(f""" The unit price increased while sales volume decreased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** means that for every 1% increase in unit price, sales volume fell by approximately **{abs(pe_coeff):.2f}%**. """) elif unit_price_pct < 0 and sales_volume_pct > 0: st.warning(f""" The unit price decreased while sales volume increased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** implies that for every 1% decrease in unit price, sales volume increased by approximately **{abs(pe_coeff):.2f}%**. """) # Plot the PE Coefficient with Plotly fig = px.line( agg_df_filtered, x='Fy', y='PE_Coeff', # Differentiate between Promo and NoPromo color='Region', # Differentiate lines by Region title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}", labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'}, markers=True ) # Customize layout and show plot fig.update_layout( height=600, width=1000, ) st.plotly_chart(fig, use_container_width=True) #################### CARD-3 MONTHLY IMPLEMENTATION ######################### # Ensure 'Dt' column is in datetime format df['Dt'] = pd.to_datetime(df['Dt']) # Extract fiscal year and month from 'Dt' column df['FY'] = df['Dt'].dt.year.astype(str) df['Month'] = df['Dt'].dt.month.astype(str) # Create FY_Month column df['FY_Month'] = df['FY'] + '_' + df['Month'] # Filter data based on selected item type and selected regions filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] # Group by Year, Region, Itemtype and aggregate SalesVolume and UnitPrice agg_df = filtered_df.groupby(['FY_Month', 'Region', 'Itemtype']).agg({ 'SalesVolume': 'sum', 'UnitPrice': 'mean' }).reset_index() # Split FY_Month again for correct sorting agg_df[['FY', 'Month']] = agg_df['FY_Month'].str.split('_', expand=True) agg_df['Month'] = agg_df['Month'].astype(int) agg_df['FY'] = agg_df['FY'].astype(int) # Combine FY and Month back into a datetime-like format for proper sorting agg_df['FY_Month_dt'] = pd.to_datetime(agg_df['FY'].astype(str) + agg_df['Month'].astype(str).str.zfill(2), format='%Y%m') # Sort values by Region, Itemtype, and FY_Month_dt agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'FY_Month_dt']) # Calculate YOY percentage changes in Sales Volume and Unit Price agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['SalesVolume'].pct_change().round(3) * 100 agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['UnitPrice'].pct_change().round(3) * 100 # Calculate Price Elasticity Coefficient (PE) agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) # Exclude FY 2021 and FY 2025 agg_df_filtered = agg_df[~agg_df['FY'].astype(str).str.contains('2020|2021|2025')] # Drop rows where PE_Coeff is NaN (optional) agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) agg_df_filtered = agg_df_filtered[(agg_df_filtered['PE_Coeff'] < 1000) & (agg_df_filtered['PE_Coeff'] > -1000)] # Plot the PE Coefficient with Plotly fig = go.Figure() # Iterate through each selected region and plot separately for region in selected_regions: # Filter the DataFrame for the current region region_df = agg_df_filtered[agg_df_filtered['Region'] == region] # Add a line trace for the region fig.add_trace(go.Scatter( x=region_df['FY_Month_dt'], # Use the datetime-like column for correct sorting y=region_df['PE_Coeff'], mode='lines+markers', name=region, # Set the name to the region to appear in the legend line=dict(width=2), marker=dict(size=6), )) # Customize layout fig.update_layout( title=f"Price Elasticity Coefficient (PE) by Year-Month for {selected_item_type}", xaxis_title="Fiscal Year_Month", yaxis_title="Price Elasticity Coefficient (PE)", height=600, width=1000, legend_title="Region", xaxis=dict( tickformat='%Y-%m', # Format X-axis ticks as Year-Month ) ) # Show the plot in Streamlit st.plotly_chart(fig, use_container_width=True)