Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import matplotlib.dates as mdates | |
import plotly.express as px | |
import plotly.graph_objects as go | |
import re | |
from datetime import datetime, timedelta | |
import warnings | |
import time | |
import dask.dataframe as dd | |
state_to_region = { | |
# WEST | |
'AK': 'WEST', 'CA': 'WEST', 'CO': 'WEST', 'HI': 'WEST', 'ID': 'WEST', | |
'MT': 'WEST', 'NV': 'WEST', 'OR': 'WEST', 'UT': 'WEST', 'WA': 'WEST', 'WY': 'WEST', | |
# SOUTHWEST | |
'AZ': 'SOUTHWEST', 'NM': 'SOUTHWEST', 'OK': 'SOUTHWEST', 'TX': 'SOUTHWEST', | |
# MIDWEST | |
'IL': 'MIDWEST', 'IN': 'MIDWEST', 'IA': 'MIDWEST', 'KS': 'MIDWEST', 'MI': 'MIDWEST', | |
'MN': 'MIDWEST', 'MO': 'MIDWEST', 'NE': 'MIDWEST', 'ND': 'MIDWEST', 'OH': 'MIDWEST', | |
'SD': 'MIDWEST', 'WI': 'MIDWEST', | |
# SOUTHEAST | |
'AL': 'SOUTHEAST', 'AR': 'SOUTHEAST', 'DE': 'SOUTHEAST', 'FL': 'SOUTHEAST', | |
'GA': 'SOUTHEAST', 'KY': 'SOUTHEAST', 'LA': 'SOUTHEAST', 'MD': 'SOUTHEAST', | |
'MS': 'SOUTHEAST', 'NC': 'SOUTHEAST', 'SC': 'SOUTHEAST', 'TN': 'SOUTHEAST', | |
'VA': 'SOUTHEAST', 'WV': 'SOUTHEAST', | |
# NORTHEAST | |
'CT': 'NORTHEAST', 'ME': 'NORTHEAST', 'MA': 'NORTHEAST', 'NH': 'NORTHEAST', | |
'NJ': 'NORTHEAST', 'NY': 'NORTHEAST', 'PA': 'NORTHEAST', 'RI': 'NORTHEAST', | |
'VT': 'NORTHEAST' | |
} | |
def date_from_week(year, week): | |
# Assuming the fiscal year starts in August and the week starts from August 1st | |
base_date = pd.to_datetime((year - 1).astype(str) + '-08-01') | |
dates = base_date + pd.to_timedelta((week - 1) * 7, unit='days') | |
return dates | |
def load_data(active_card): | |
# st.write(f"{active_card}") | |
# Define columns common to multiple cards if there are any | |
common_cols = ['FyWeek', 'Itemtype', 'Chaincode', 'State', 'SalesVolume', 'UnitPrice', 'Sales'] | |
# Columns specific to cards | |
card_specific_cols = { | |
'card1': ['FyWeek', 'State', 'Itemtype', 'Chaincode', 'SalesVolume'], | |
'card2': ['FyWeek', 'Fy', 'State','Store','Address','Zipcode','City','Itemtype', 'Chaincode', 'Containercode', 'SalesVolume', 'UnitPrice', 'Sales'], | |
'card3': ['FyWeek', 'Fy', 'State', 'Store', 'Itemtype', 'Chaincode', 'SalesVolume', 'UnitPrice', 'Sales'] # Added for PE calculation card | |
} | |
# Choose columns based on the active card | |
required_columns = card_specific_cols.get(active_card, common_cols) | |
# Define the data types for efficient memory usage | |
dtype_spec = { | |
'FyWeek': 'string', | |
'Fy': 'category', # Add data type for 'Fy' if it's used | |
'Itemtype': 'category', | |
'Chaincode': 'category', | |
'State': 'category', | |
"Store": "category", | |
'Containercode': 'category', | |
"Address": "string", | |
"Zipcode": "float", | |
"City": "category", | |
'SalesVolume': 'float', | |
'UnitPrice': 'float', | |
'Sales': 'float' | |
} | |
# Read only the necessary columns | |
# st.write(required_columns) | |
ddf = dd.read_csv("fy21-24.csv", usecols=required_columns, dtype=dtype_spec) | |
df = ddf.compute() | |
if active_card in ['card2','card3']: | |
df = df.groupby(['FyWeek', 'Fy', 'Chaincode', 'Store', 'Address', 'Zipcode', 'City', 'State', 'Containercode', 'Itemtype'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean', | |
'Sales': 'sum' | |
}).reset_index() | |
df[['FY', 'Week']] = df['FyWeek'].str.split(' Week ', expand=True) | |
df['Week'] = df['Week'].astype(int) # Convert 'Week' to int | |
df['Year'] = df['FY'].str[2:].astype(int) # Extract year part and convert to int | |
df['Dt'] = date_from_week(df['Year'], df['Week']) | |
# Add the region column based on state | |
df['Region'] = df['State'].map(state_to_region) | |
# st.write(df.columns) | |
return df | |
# Display logo | |
st.image("bonnie.png", width=150) # Adjust width as needed | |
# Display title | |
# st.title("Bonnie Plants Pricing & Sales Analytics Dashboard") | |
st.title("Price vs. Sales Volume Tracker Dashboard") | |
# Initialize session state for storing which card was clicked and item type | |
if 'active_card' not in st.session_state: | |
st.session_state['active_card'] = None | |
if 'selected_item_type' not in st.session_state: | |
st.session_state['selected_item_type'] = 'CORE' # Set default to 'CORE' | |
if 'selected_feature' not in st.session_state: | |
st.session_state['selected_feature'] = 'Chaincode' # Default to 'Chain Code' | |
# Card selection buttons | |
col1, col2 ,col3= st.columns(3) | |
# Define buttons for plot categories, update session state when clicked | |
with col1: | |
if st.button("Sales Volume Trend for Item Category"): | |
st.session_state['active_card'] = 'card1' | |
with col2: | |
if st.button("Sales Volume & Unit Price Correlation for Item Category and Container Code"): | |
st.session_state['active_card'] = 'card2' | |
with col3: | |
if st.button("PE Coefficient Calculation for Regions & Item Categories"): | |
st.session_state['active_card'] = 'card3' | |
start_time=time.time() | |
# st.write(st.session_state['active_card']) | |
df = load_data(st.session_state['active_card']) | |
time_taken = time.time() - start_time | |
st.write(f"Data loaded in {time_taken:.2f} seconds") | |
# Initialize session state for storing the selected state and feature | |
if 'selected_state' not in st.session_state: | |
st.session_state['selected_state'] = df['State'].unique()[0] # Default to the first state | |
############################################ CARD #1 #################################################### | |
if st.session_state['active_card'] == 'card1': | |
# st.write("Processing card1...") | |
# Dropdown for selecting the state | |
selected_state = st.selectbox('Select State', df['State'].unique()) | |
# Dropdown for selecting the feature for grouping | |
selected_feature = st.selectbox('Select Feature for Grouping', ['Chaincode', 'Itemtype',]) | |
# Filter the dataframe based on selected state | |
filtered_df = df[df['State'] == selected_state] | |
# Time the grouping operation | |
start_time = time.time() | |
group_data = filtered_df.groupby(['FyWeek', selected_feature],observed=True)['SalesVolume'].sum().reset_index() | |
time_taken = time.time() - start_time | |
# Plotting | |
fig = px.bar(group_data, x='FyWeek', y='SalesVolume', color=selected_feature, | |
title=f'Sales Volume over Fiscal Week in {selected_state} by {selected_feature}', | |
labels={'SalesVolume': 'Sales Volume', 'Fiscal Week Short': 'Fiscal Week'}) | |
st.plotly_chart(fig) | |
########################################################################################################## | |
########################################### CARD #2 #################################################### | |
if st.session_state['active_card'] == 'card2': | |
# Dropdown to select item type (using session_state) | |
st.session_state['selected_item_type'] = st.selectbox( | |
'Select Item Type', df['Itemtype'].unique(), | |
index=list(df['Itemtype'].unique()).index(st.session_state['selected_item_type'])) | |
# Dropdown to select the grouping category (container code, chain code, or state) | |
group_by_option = st.selectbox('Group by', ['Containercode', 'Chaincode', 'State']) | |
# Multi-select checkbox to select multiple years | |
selected_years = st.multiselect('Select Year(s)', [2021, 2022, 2023, 2024], default=[2021]) | |
st.subheader(f"Sales Volume & Unit Price Correlation for {group_by_option} in {', '.join(map(str, selected_years))}") | |
# Convert 'Dt' column to datetime | |
df['Dt'] = pd.to_datetime(df['Dt'], errors='coerce') | |
df['Promo'] = np.where(df['Dt'].dt.month.astype(str).isin(['3', '4', '5', '6']), 'Promo', 'NoPromo') | |
df["Promo"] = df["Promo"].astype("category") | |
# Filter the dataframe based on the selected item type and selected years | |
filtered_df = df[(df['Itemtype'] == st.session_state['selected_item_type']) & (df['Dt'].dt.year.isin(selected_years))] | |
# Find the top 3 values based on total SalesVolume in the selected grouping category | |
top_3_values = filtered_df.groupby(group_by_option, observed=True)['SalesVolume'].sum().nlargest(3).index | |
# Filter the data for only the top 3 values | |
top_group_data = filtered_df[filtered_df[group_by_option].isin(top_3_values)] | |
# Aggregate data | |
agg_df = top_group_data.groupby([group_by_option, 'Year', 'Week', 'Dt'], observed=True).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Create a new column 'week-year' for X-axis labels | |
agg_df['week-year'] = agg_df['Dt'].dt.strftime('%U-%Y') | |
# Loop through the top 3 values and create separate plots using Plotly | |
for value in top_3_values: | |
value_data = agg_df[agg_df[group_by_option] == value] | |
# Assuming you have 'value_data' from your previous code | |
mean_sales_volume = value_data['SalesVolume'].mean() | |
mean_unit_price = value_data['UnitPrice'].mean() | |
# Create a Plotly figure | |
fig = go.Figure() | |
# Add SalesVolume trace | |
fig.add_trace(go.Scatter( | |
x=value_data['week-year'], | |
y=value_data['SalesVolume'], | |
mode='lines+markers', | |
name='SalesVolume', | |
line=dict(color='blue'), | |
hovertemplate='SalesVolume: %{y}<br>Week-Year: %{x}' | |
)) | |
# Add UnitPrice trace on a secondary Y-axis | |
fig.add_trace(go.Scatter( | |
x=value_data['week-year'], | |
y=value_data['UnitPrice'], | |
mode='lines+markers', | |
name='UnitPrice', | |
line=dict(color='green'), | |
yaxis='y2', | |
hovertemplate='UnitPrice: %{y}<br>Week-Year: %{x}' | |
)) | |
# Add mean line for SalesVolume | |
fig.add_shape(type="line", | |
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
y0=mean_sales_volume, y1=mean_sales_volume, | |
line=dict(color="blue", width=2, dash="dash"), | |
xref='x', yref='y') | |
# Add mean line for UnitPrice (on secondary Y-axis) | |
fig.add_shape(type="line", | |
x0=value_data['week-year'].min(), x1=value_data['week-year'].max(), | |
y0=mean_unit_price, y1=mean_unit_price, | |
line=dict(color="green", width=2, dash="dash"), | |
xref='x', yref='y2') | |
# Update layout for dual axes | |
fig.update_layout( | |
template='plotly_white', | |
title=f"SalesVolume and UnitPrice - {value} ({group_by_option})", | |
xaxis_title='Week-Year', | |
yaxis_title='Sales Volume', | |
yaxis2=dict(title='UnitPrice', overlaying='y', side='right'), | |
legend=dict(x=0.9, y=1.15), | |
hovermode="x unified", # Show both values in a tooltip | |
height=400, | |
autosize=False, | |
margin=dict(l=1, r=1, t=1, b=1) | |
) | |
# Rotate X-axis labels | |
fig.update_xaxes(tickangle=90) | |
# Display the Plotly figure in Streamlit | |
st.plotly_chart(fig, use_container_width=False) | |
########################################################################################################## | |
if st.session_state['active_card'] == 'card3': | |
# Dropdown for selecting the item type | |
item_type_options = df['Itemtype'].unique() | |
selected_item_type = st.selectbox("Select Item Type", item_type_options) | |
# Dropdown for selecting the region (multiple selection allowed) | |
region_options = df['Region'].unique() | |
selected_regions = st.multiselect("Select Region(s)", region_options, default=region_options) | |
# Filter data based on selected item type and selected regions | |
filtered_df = df[(df['Itemtype'] == selected_item_type) & (df['Region'].isin(selected_regions))] | |
# Group by Year, Region, Itemtype and Promo, and aggregate SalesVolume and UnitPrice | |
agg_df = filtered_df.groupby(['Fy', 'Region', 'Itemtype',]).agg({ | |
'SalesVolume': 'sum', | |
'UnitPrice': 'mean' | |
}).reset_index() | |
# Sort values by Region, Itemtype, Fy, and Promo for YOY calculation | |
agg_df = agg_df.sort_values(by=['Region', 'Itemtype', 'Fy',]) | |
# Calculate YOY percentage changes in Sales Volume and Unit Price | |
agg_df['SalesVolume_pct_change'] = agg_df.groupby(['Region', 'Itemtype',])['SalesVolume'].pct_change().round(3) * 100 | |
agg_df['UnitPrice_pct_change'] = agg_df.groupby(['Region', 'Itemtype', ])['UnitPrice'].pct_change().round(3) * 100 | |
# Calculate Price Elasticity Coefficient (PE) | |
agg_df['PE_Coeff'] = (agg_df['SalesVolume_pct_change'] / agg_df['UnitPrice_pct_change']).round(2) | |
# Exclude FY 2025 but keep FY 2021 even with NaN values | |
agg_df_filtered = agg_df[agg_df['Fy'] != 'FY 2025'] | |
# Drop rows where PE_Coeff is NaN (optional) | |
agg_df_filtered = agg_df_filtered.dropna(subset=['PE_Coeff']) | |
st.dataframe(agg_df_filtered) | |
st.write(agg_df_filtered.shape) | |
# Plot the PE Coefficient with Plotly | |
fig = px.line( | |
agg_df_filtered, | |
x='Fy', | |
y='PE_Coeff', # Differentiate between Promo and NoPromo | |
line_dash='Region', # Differentiate lines by Region | |
title=f"Price Elasticity Coefficient (PE) by Year for {selected_item_type}", | |
labels={'Fy': 'Fiscal Year', 'PE_Coeff': 'Price Elasticity Coefficient'}, | |
markers=True | |
) | |
# Customize layout and show plot | |
fig.update_layout( | |
height=600, | |
width=1000, | |
) | |
st.plotly_chart(fig, use_container_width=True) | |
################################################################################################################### | |