import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import plotly.express as px
import plotly.graph_objects as go
import re
from datetime import datetime, timedelta
import warnings
import time
import dask.dataframe as dd
state_to_region = {
# WEST
'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST',
'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST',
# SOUTHWEST
'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST',
# MIDWEST
'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST',
'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST',
'SD': 'MIDWEST', 'WI': 'MIDWEST',
# SOUTHEAST
'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST',
'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST',
'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST',
'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST',
# NORTHEAST
'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST',
'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST',
'VT': 'NORTHEAST'
}
@st.cache_data
def date_from_week(year, week):
# Assuming the fiscal year starts in August and the week starts from August 1st
base_date = pd.to_datetime((year - 1).astype(str) + '-08-01')
dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days')
return dates
@st.cache_data
def load_data(active_card):
# st.write(f"{active_card}")
# Define columns common to multiple cards if there are any
common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales']
# Columns specific to cards
card_specific_cols = {
'card1': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
# 'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'],
'card3': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card
}
# Choose columns based on the active card
required_columns = card_specific_cols.get(active_card, common_cols)
# Define the data types for efficient memory usage
dtype_spec = {
'FyWeek': 'string',
'Fy': 'category', # Add data type for 'Fy' if it's used
'Itemtype': 'category',
'Chaincode': 'category',
'State': 'category',
"Store": "category",
'Containercode': 'category',
"Address": "string",
"Zipcode": "float",
"City": "category",
'SalesVolume': 'float',
'UnitPrice': 'float',
'Sales': 'float'
}
# Read only the necessary columns
# st.write(required_columns)
ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec)
df = ddf.compute()
# st.write("+++++++++++++++++++++++")
if active_card in ['card1','card2', 'card3',]:
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean',
'Sales': 'sum'
}).reset_index()
df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True)
df['Week'] = df['Week'].astype(int) # Convert 'Week' to int
df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int
df['Dt'] = date_from_week(df['Year'], df['Week'])
# Add the region column based on state
df['Region'] = df['State'].map(state_to_region)
return df
# Display logo
st.image("bonnie.png", width=150) # Adjust width as needed
# Display title
# st.title("Price vs. Sales Volume Tracker Dashboard")
# Initialize session state for storing which card was clicked and item type
if 'active_card' not in st.session_state:
st.session_state['active_card'] = None
if 'selected_item_type' not in st.session_state:
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE'
if 'selected_feature' not in st.session_state:
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code'
# Card selection buttons with logic to reset session state on switch
col1, col3 = st.columns(2)
with col1:
if st.button("Sales Volume Trend"):
st.session_state['active_card'] = 'card1'
# Reset other selections when switching cards
st.session_state['selected_state'] = None
st.session_state['selected_chaincode'] = None
st.session_state['selected_itemtype'] = None
st.session_state['selected_containercode'] = None
# with col2:
# if st.button("Sales Volume vs Median Unit Price Trend"):
# st.session_state['active_card'] = 'card2'
# # Reset selections when switching cards
# st.session_state['selected_state'] = None
# st.session_state['selected_chaincode'] = None
# st.session_state['selected_itemtype'] = None
# st.session_state['selected_containercode'] = None
with col3:
if st.button("Price Elasticity Coefficient Trend YoY"):
st.session_state['active_card'] = 'card3'
# Reset selections when switching cards
st.session_state['selected_state'] = None
st.session_state['selected_chaincode'] = None
st.session_state['selected_itemtype'] = None
st.session_state['selected_containercode'] = None
# Load data for the current card
start_time = time.time()
df = load_data(st.session_state['active_card'])
time_taken = time.time() - start_time
st.write(f"Data loaded in {time_taken:.2f} seconds")
############################################ CARD #1 ####################################################
if st.session_state['active_card'] == 'card1':
# Step 1: Sales Volume vs FyWeek for the whole dataset (no filter)
st.subheader("Total Sales Volume by Fiscal Week")
df['FY_Week'] = df['FY'].astype(str) + '_' + df['Week'].astype(str)
# Split FY_Week again for correct sorting
if not df.empty and 'FY_Week' in df.columns:
total_sales_df = df.groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index()
total_sales_df[['FY', 'Week']] = total_sales_df['FY_Week'].str.split('_', expand=True)
total_sales_df['Week'] = total_sales_df['Week'].astype(int)
total_sales_df = total_sales_df.sort_values(by=['FY', 'Week'])
# Create a line chart using Plotly
fig = px.line(total_sales_df, x='FY_Week', y='SalesVolume',
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'})
st.plotly_chart(fig)
# Step 2: Top 3 states based on sales volume as buttons/cards
top_states = df.groupby('State', observed=True)['SalesVolume'].sum().nlargest(3).index
st.write("### Top 3 Selling States in the last 4 years (drill down by state)")
col1, col2, col3 = st.columns(3)
if len(top_states) > 0 and col1.button(top_states[0]):
st.session_state['selected_state'] = top_states[0]
if len(top_states) > 1 and col2.button(top_states[1]):
st.session_state['selected_state'] = top_states[1]
if len(top_states) > 2 and col3.button(top_states[2]):
st.session_state['selected_state'] = top_states[2]
# If a state is selected, show the corresponding plot
if 'selected_state' in st.session_state and st.session_state['selected_state']:
selected_state = st.session_state['selected_state']
# Step 3: Sales volume vs FyWeek for the selected state
st.subheader(f"Sales Volume by Fiscal Week for {selected_state} (drill down by Chaincode) ")
state_sales_df = df[df['State'] == selected_state].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index()
if not state_sales_df.empty and 'FY_Week' in state_sales_df.columns:
state_sales_df[['FY', 'Week']] = state_sales_df['FY_Week'].str.split('_', expand=True)
state_sales_df['Week'] = state_sales_df['Week'].astype(int)
state_sales_df = state_sales_df.sort_values(by=['FY', 'Week'])
fig = px.line(state_sales_df, x='FY_Week', y='SalesVolume',
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'})
st.plotly_chart(fig)
# Step 4: Top 3 chaincodes based on sales volume as buttons/cards
top_chaincodes = df[df['State'] == selected_state].groupby('Chaincode', observed=True)['SalesVolume'].sum().nlargest(3).index
st.write(f"### Top 3 selling Chaincode in {selected_state}:")
# Add a check to ensure top_chaincodes has values before accessing
col1, col2, col3 = st.columns(3)
if len(top_chaincodes) > 0 and col1.button(top_chaincodes[0]):
st.session_state['selected_chaincode'] = top_chaincodes[0]
if len(top_chaincodes) > 1 and col2.button(top_chaincodes[1]):
st.session_state['selected_chaincode'] = top_chaincodes[1]
if len(top_chaincodes) > 2 and col3.button(top_chaincodes[2]):
st.session_state['selected_chaincode'] = top_chaincodes[2]
# If a chaincode is selected, show the corresponding plot
if 'selected_chaincode' in st.session_state:
selected_chaincode = st.session_state['selected_chaincode']
# Step 5: Sales volume vs FyWeek for the selected chaincode in the selected state
st.subheader(f"Sales Volume by Fiscal Week for {selected_chaincode} in {selected_state}")
chain_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('FY_Week', observed=True)['SalesVolume'].sum().reset_index()
if not chain_sales_df.empty and 'FY_Week' in chain_sales_df.columns:
chain_sales_df[['FY', 'Week']] = chain_sales_df['FY_Week'].str.split('_', expand=True)
chain_sales_df['Week'] = chain_sales_df['Week'].astype(int)
chain_sales_df = chain_sales_df.sort_values(by=['FY', 'Week'])
fig = px.line(chain_sales_df, x='FY_Week', y='SalesVolume',
# title=f'Sales Volume vs Fiscal Week in {selected_chaincode}, {selected_state}',
labels={'SalesVolume': 'Sales Volume', 'FY_Week': 'Fiscal Week'})
st.plotly_chart(fig)
# Step 6: Top 3 itemtypes based on sales volume as buttons/cards
top_itemtypes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode)].groupby('Itemtype', observed=True)['SalesVolume'].sum().nlargest(3).index
st.write(f"### Top Item Type in {selected_chaincode}, {selected_state} (drill down by ItemType) :")
col1, col2, col3 = st.columns(3)
if len(top_itemtypes) > 0 and col1.button(top_itemtypes[0]):
st.session_state['selected_itemtype'] = top_itemtypes[0]
if len(top_itemtypes) > 1 and col2.button(top_itemtypes[1]):
st.session_state['selected_itemtype'] = top_itemtypes[1]
if len(top_itemtypes) > 2 and col3.button(top_itemtypes[2]):
st.session_state['selected_itemtype'] = top_itemtypes[2]
# If an itemtype is selected, show the corresponding dual-axis plot for Sales Volume & Unit Price
if 'selected_itemtype' in st.session_state:
selected_itemtype = st.session_state['selected_itemtype']
# Step 7: Dual-axis plot for Sales volume and UnitPrice vs FyWeek for the selected itemtype
# st.subheader(f"Sales Volume & Unit Price vs Fiscal Week for {selected_itemtype} in {selected_chaincode}, {selected_state}")
item_sales_df = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('FY_Week', observed=True).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean'
}).reset_index()
if not item_sales_df.empty and 'FY_Week' in item_sales_df.columns:
item_sales_df[['FY', 'Week']] = item_sales_df['FY_Week'].str.split('_', expand=True)
item_sales_df['Week'] = item_sales_df['Week'].astype(int)
item_sales_df = item_sales_df.sort_values(by=['FY', 'Week'])
# Dual-axis plot using Plotly Graph Objects
fig = go.Figure()
# Add SalesVolume trace
fig.add_trace(go.Scatter(
x=item_sales_df['FY_Week'],
y=item_sales_df['SalesVolume'],
mode='lines+markers',
name='SalesVolume',
line=dict(color='blue'),
hovertemplate='SalesVolume: %{y}
Week-Year: %{x}'
))
# Add UnitPrice trace with secondary Y-axis
fig.add_trace(go.Scatter(
x=item_sales_df['FY_Week'],
y=item_sales_df['UnitPrice'],
mode='lines+markers',
name='UnitPrice',
line=dict(color='green'),
yaxis='y2',
hovertemplate='UnitPrice: %{y}
Week-Year: %{x}'
))
# Update layout for dual axes
fig.update_layout(
title=f"Sales Volume vs Unit Price by Fiscal Week for {selected_itemtype}, {selected_chaincode}, {selected_state}",
xaxis_title='Fiscal Week',
yaxis_title='Sales Volume',
yaxis2=dict(title='Unit Price', overlaying='y', side='right'),
legend=dict(x=0.9, y=1.15),
hovermode="x unified", # Show both values in a tooltip
height=600,
margin=dict(l=50, r=50, t=50, b=50)
)
# Rotate X-axis labels
fig.update_xaxes(tickangle=90)
# Display the Plotly figure in Streamlit
st.plotly_chart(fig, use_container_width=True)
# Step 8: Display Top/Bottom Container Codes and Stores
st.subheader("Top & Bottom 3 Container Codes and Stores")
# Get top and bottom 3 container codes based on SalesVolume
top_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nlargest(3).reset_index()
bottom_containercodes = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Containercode', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index()
# Get top and bottom 3 stores based on SalesVolume
top_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nlargest(3).reset_index()
bottom_stores = df[(df['State'] == selected_state) & (df['Chaincode'] == selected_chaincode) & (df['Itemtype'] == selected_itemtype)].groupby('Store', observed=True)['SalesVolume'].sum().nsmallest(3).reset_index()
# Display top and bottom container codes side by side
st.write("### Container Codes:")
col1, col2 = st.columns(2)
with col1:
st.write("#### Top 3 Container Codes")
st.dataframe(top_containercodes)
with col2:
st.write("#### Bottom 3 Container Codes")
st.dataframe(bottom_containercodes)
# Display top and bottom stores side by side
st.write("### Stores:")
col3, col4 = st.columns(2)
with col3:
st.write("#### Top 3 Stores")
st.dataframe(top_stores)
with col4:
st.write("#### Bottom 3 Stores")
st.dataframe(bottom_stores)
##########################################################################################################
########################################### CARD #2 ####################################################
# if st.session_state['active_card'] == 'card2':
# # Identify the top 10 Itemtypes based on total SalesVolume
# top_10_itemtypes = df.groupby('Itemtype')['SalesVolume'].sum().nlargest(10).index
# # Filter the DataFrame to include only the top 10 Itemtypes
# df = df[df['Itemtype'].isin(top_10_itemtypes)]
# # Dropdown to select item type (using session_state)
# st.session_state['selected_item_type'] = st.selectbox(
# 'Select Item Type', df['Itemtype'].unique(),
# index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type']))
# # Dropdown to select the grouping category (container code, chain code, or state)
# group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State','Region'])
# # Multi-select checkbox to select multiple years
# selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021])
# st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}")
# # Convert 'Dt' column to datetime
# df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce')
# df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo')
# df["Promo"] = df["Promo"].astype("category")
# # Filter the dataframe based on the selected item type and selected years
# filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))]
# # Find the top 3 values based on total SalesVolume in the selected grouping category
# top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index
# # Filter the data for only the top 3 values
# top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)]
# # Aggregate data
# agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({
# 'SalesVolume': 'sum',
# 'UnitPrice': 'mean'
# }).reset_index()
# # Create a new column 'week-year' for X-axis labels
# agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y')
# # Loop through the top 3 values and create separate plots using Plotly
# for value in top_3_values:
# value_data = agg_df[agg_df[group_by_option] == value]
# # Assuming you have 'value_data' from your previous code
# mean_sales_volume = value_data['SalesVolume'].mean()
# mean_unit_price = value_data['UnitPrice'].mean()
# # Create a Plotly figure
# fig = go.Figure()
# # Add SalesVolume trace
# fig.add_trace(go.Scatter(
# x=value_data['week-year'],
# y=value_data['SalesVolume'],
# mode='lines+markers',
# name='SalesVolume',
# line=dict(color='blue'),
# hovertemplate='SalesVolume: %{y}
Week-Year: %{x}'
# ))
# # Add UnitPrice trace on a secondary Y-axis
# fig.add_trace(go.Scatter(
# x=value_data['week-year'],
# y=value_data['UnitPrice'],
# mode='lines+markers',
# name='UnitPrice',
# line=dict(color='green'),
# yaxis='y2',
# hovertemplate='UnitPrice: %{y}
Week-Year: %{x}'
# ))
# # Add mean line for SalesVolume
# fig.add_shape(type="line",
# x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
# y0=mean_sales_volume, y1=mean_sales_volume,
# line=dict(color="blue", width=2, dash="dash"),
# xref='x', yref='y')
# # Add mean line for UnitPrice (on secondary Y-axis)
# fig.add_shape(type="line",
# x0=value_data['week-year'].min(), x1=value_data['week-year'].max(),
# y0=mean_unit_price, y1=mean_unit_price,
# line=dict(color="green", width=2, dash="dash"),
# xref='x', yref='y2')
# # Update layout for dual axes
# fig.update_layout(
# template='plotly_white',
# title=f"SalesVolume and UnitPrice - {value} ({group_by_option})",
# xaxis_title='Week-Year',
# yaxis_title='Sales Volume',
# yaxis2=dict(title='UnitPrice', overlaying='y', side='right'),
# legend=dict(x=0.9, y=1.15),
# hovermode="x unified", # Show both values in a tooltip
# height=600,
# margin=dict(l=50, r=50, t=50, b=50)
# )
# # Rotate X-axis labels
# fig.update_xaxes(tickangle=90)
# # Display the Plotly figure in Streamlit
# st.plotly_chart(fig, use_container_width=True)
################################
if st.session_state['active_card'] == 'card3':
# Dropdown for selecting the item type
item_type_options = df['Itemtype'].unique()
selected_item_type = st.selectbox("Select Item Type", item_type_options)
# Dropdown for selecting the region (multiple selection allowed)
region_options = df['Region'].dropna().unique()
selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options)
# Filter data based on selected item type and selected regions
filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))]
# Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice
agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean'
}).reset_index()
# Sort values by Region, Itemtype, Fy, and Promo for YOY calculation
agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',])
# Calculate YOY percentage changes in Sales Volume and Unit Price
agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100
agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100
# Calculate Price Elasticity Coefficient (PE)
agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2)
# Exclude FY 2025 but keep FY 2021 even with NaN values
agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025']
# Drop rows where PE_Coeff is NaN (optional)
agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff'])
agg_df_filtered = agg_df_filtered.rename(columns={
'SalesVolume_pct_change': 'SlVol%change',
'UnitPrice_pct_change': 'UnPr%change',
})
agg_df_filtered = agg_df_filtered.reset_index(drop=True)
st.dataframe(agg_df_filtered)
st.write(agg_df_filtered.shape)
# Extract values for the current and previous years from row 1 and row 2 of the dataframe
current_year_row = agg_df_filtered.iloc[1] # Row 1 - Current Year
previous_year_row = agg_df_filtered.iloc[0] # Row 2 - Previous Year
# Extract values for Unit Price and Sales Volume
unit_price_current_year = current_year_row['UnitPrice']
unit_price_previous_year = previous_year_row['UnitPrice']
sales_volume_current_year = current_year_row['SalesVolume']
sales_volume_previous_year = previous_year_row['SalesVolume']
# Calculate percentage changes for Unit Price and Sales Volume
unit_price_pct = ((unit_price_current_year - unit_price_previous_year) / unit_price_previous_year) * 100
sales_volume_pct = ((sales_volume_current_year - sales_volume_previous_year) / sales_volume_previous_year) * 100
# Calculate PE Coefficient
pe_coeff = sales_volume_pct / unit_price_pct
st.markdown(f'''### Calculations for Price Elasticity Coefficient''')
st.latex(rf"""
\text{{Unit Price \% Change}} = \frac{{{unit_price_current_year:.2f} - {unit_price_previous_year:.2f}}}{{{unit_price_previous_year:.2f}}} \times 100 = {unit_price_pct:.2f}\%
""")
# Sales Volume % Change
st.latex(rf"""
\text{{Sales Volume \% Change}} = \frac{{{sales_volume_current_year:.2f} - {sales_volume_previous_year:.2f}}}{{{sales_volume_previous_year:.2f}}} \times 100 = {sales_volume_pct:.2f}\%
""")
# PE Coefficient
st.latex(rf"""
\text{{PE Coefficient}} = \frac{{{sales_volume_pct:.2f}}}{{{unit_price_pct:.2f}}} = {pe_coeff:.2f}
""")
# Explanation for PE Coefficient Conditions
st.markdown(f"""
### Interpretation of Price Elasticity (PE) Coefficient:
The Price Elasticity (PE) coefficient reflects how sensitive sales volume is to changes in unit price.
- If the **PE coefficient is positive**:
1. When the price increases, sales volume increases.
2. When the price decreases, sales volume decreases.
- If the **PE coefficient is negative**:
1. When the price increases, sales volume decreases.
2. When the price decreases, sales volume increases.
""")
# Dynamic analysis based on the calculated PE coefficient and signs of changes
if unit_price_pct > 0 and sales_volume_pct > 0:
st.warning(f"""
Both unit price and sales volume increased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** indicates that for every 1% increase in unit price, sales volume increased by approximately **{pe_coeff:.2f}%**.
""")
elif unit_price_pct < 0 and sales_volume_pct < 0:
st.warning(f"""
Both unit price and sales volume decreased (refer first and second row of the table). The PE coefficient of **{pe_coeff:.2f}** suggests that for every 1% decrease in unit price, sales volume decreased by approximately **{pe_coeff:.2f}%**.
""")
elif unit_price_pct > 0 and sales_volume_pct < 0:
st.warning(f"""
The unit price increased while sales volume decreased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** means that for every 1% increase in unit price, sales volume fell by approximately **{abs(pe_coeff):.2f}%**.
""")
elif unit_price_pct < 0 and sales_volume_pct > 0:
st.warning(f"""
The unit price decreased while sales volume increased (refer first and second row of the table). The negative PE coefficient of **{pe_coeff:.2f}** implies that for every 1% decrease in unit price, sales volume increased by approximately **{abs(pe_coeff):.2f}%**.
""")
# Plot the PE Coefficient with Plotly
fig = px.line(
agg_df_filtered,
x='Fy',
y='PE_Coeff', # Differentiate between Promo and NoPromo
color='Region', # Differentiate lines by Region
title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}",
labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'},
markers=True
)
# Customize layout and show plot
fig.update_layout(
height=600,
width=1000,
)
st.plotly_chart(fig, use_container_width=True)
#################### CARD-3 MONTHLY IMPLEMENTATION #########################
# Ensure 'Dt' column is in datetime format
df['Dt'] = pd.to_datetime(df['Dt'])
# Extract fiscal year and month from 'Dt' column
df['FY'] = df['Dt'].dt.year.astype(str)
df['Month'] = df['Dt'].dt.month.astype(str)
# Create FY_Month column
df['FY_Month'] = df['FY'] + '_' + df['Month']
# Filter data based on selected item type and selected regions
filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))]
# Group by Year, Region, Itemtype and aggregate SalesVolume and UnitPrice
agg_df = filtered_df.groupby(['FY_Month', 'Region', 'Itemtype']).agg({
'SalesVolume': 'sum',
'UnitPrice': 'mean'
}).reset_index()
# Split FY_Month again for correct sorting
agg_df[['FY', 'Month']] = agg_df['FY_Month'].str.split('_', expand=True)
agg_df['Month'] = agg_df['Month'].astype(int)
agg_df['FY'] = agg_df['FY'].astype(int)
# Combine FY and Month back into a datetime-like format for proper sorting
agg_df['FY_Month_dt'] = pd.to_datetime(agg_df['FY'].astype(str) + agg_df['Month'].astype(str).str.zfill(2), format='%Y%m')
# Sort values by Region, Itemtype, and FY_Month_dt
agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'FY_Month_dt'])
# Calculate YOY percentage changes in Sales Volume and Unit Price
agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['SalesVolume'].pct_change().round(3) * 100
agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype'])['UnitPrice'].pct_change().round(3) * 100
# Calculate Price Elasticity Coefficient (PE)
agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2)
# Exclude FY 2021 and FY 2025
agg_df_filtered = agg_df[~agg_df['FY'].astype(str).str.contains('2020|2021|2025')]
# Drop rows where PE_Coeff is NaN (optional)
agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff'])
agg_df_filtered = agg_df_filtered[(agg_df_filtered['PE_Coeff'] < 1000) & (agg_df_filtered['PE_Coeff'] > -1000)]
# Plot the PE Coefficient with Plotly
fig = go.Figure()
# Iterate through each selected region and plot separately
for region in selected_regions:
# Filter the DataFrame for the current region
region_df = agg_df_filtered[agg_df_filtered['Region'] == region]
# Add a line trace for the region
fig.add_trace(go.Scatter(
x=region_df['FY_Month_dt'], # Use the datetime-like column for correct sorting
y=region_df['PE_Coeff'],
mode='lines+markers',
name=region, # Set the name to the region to appear in the legend
line=dict(width=2),
marker=dict(size=6),
))
# Customize layout
fig.update_layout(
title=f"Price Elasticity Coefficient (PE) by Year-Month for {selected_item_type}",
xaxis_title="Fiscal Year_Month",
yaxis_title="Price Elasticity Coefficient (PE)",
height=600,
width=1000,
legend_title="Region",
xaxis=dict(
tickformat='%Y-%m', # Format X-axis ticks as Year-Month
)
)
# Show the plot in Streamlit
st.plotly_chart(fig, use_container_width=True)