Streamlit-app / app.py
QuophyDzifa's picture
app.py
cd2c21d
import pandas as pd
import streamlit as st
import numpy as np
import pickle
# streamlit import sklearn
from PIL import Image
import os
import xgboost as xgb
# model = xgb.XGBRegressor()
# model = model.load_model('export/xg_model.json')
# Load the saved components:
with open(".\export\dt_model.pkl", "rb") as f:
components = pickle.load(f)
# Extract the individual components
num_imputer = components["num_imputer"]
cat_imputer = components["cat_imputer"]
encoder = components["encoder"]
scaler = components["scaler"]
dt_model = components["models"]
# Create the app
st.set_page_config(
layout="wide"
)
# # Add an image or logo to the app
# image = Image.open('copofav.jpg')
# # Open the image file
# st.image(image)
# add app title
st.title("SALES PREDICTION APP")
# Add some text
st.write("Please ENTER the relevant data and CLICK Predict.")
# Create the input fields
input_data = {}
col1, col2, col3 = st.columns(3)
with col1:
input_data['store_nbr'] = st.slider(
"Store Number", min_value=0, step=1, max_value=54)
input_data['family'] = st.selectbox("Products Family", ['AUTOMOTIVE', 'BABY CARE', 'BEAUTY', 'BEVERAGES', 'BOOKS',
'BREAD/BAKERY', 'CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS',
'FROZEN FOODS', 'GROCERY I', 'GROCERY II', 'HARDWARE',
'HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES',
'HOME CARE', 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE',
'LIQUOR,WINE,BEER', 'MAGAZINES', 'MEATS', 'PERSONAL CARE',
'PET SUPPLIES', 'PLAYERS AND ELECTRONICS', 'POULTRY',
'PREPARED FOODS', 'PRODUCE', 'SCHOOL AND OFFICE SUPPLIES',
'SEAFOOD'])
input_data['onpromotion'] = st.number_input(
"Discount Amt On Promotion", step=1)
with col2:
input_data['state'] = st.selectbox("State", ['Santa Elena', 'Pichincha', 'Cotopaxi', 'Chimborazo', 'Imbabura',
'Santo Domingo de los Tsachilas', 'Bolivar', 'Tungurahua',
'Guayas', 'Los Rios', 'Azuay', 'Loja', 'El Oro', 'Esmeraldas',
'Manabi', 'Pastaza'])
input_data['store_type'] = st.radio(
"Store Type", options=['A', 'B', 'C', 'D', 'E'], horizontal=True)
input_data['cluster'] = st.number_input("Cluster", step=1)
with col3:
input_data['month'] = st.slider("Month", 1, 12)
input_data['day'] = st.slider("Day", 1, 31)
input_data['dcoilwtico'] = st.slider(
"DCOILWTICO", min_value=29, step=1, max_value=108)
# Create a button to make a prediction
if st.button("Predict"):
# Convert the input data to a pandas DataFrame
input_df = pd.DataFrame([input_data])
# # categorizing the products
# food_families = ['BEVERAGES', 'BREAD/BAKERY', 'FROZEN FOODS', 'MEATS', 'PREPARED FOODS', 'DELI','PRODUCE', 'DAIRY','POULTRY','EGGS','SEAFOOD']
# home_families = ['HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES']
# clothing_families = ['LINGERIE', 'LADYSWARE']
# grocery_families = ['GROCERY I', 'GROCERY II']
# stationery_families = ['BOOKS', 'MAGAZINES','SCHOOL AND OFFICE SUPPLIES']
# cleaning_families = ['HOME CARE', 'BABY CARE','PERSONAL CARE']
# hardware_families = ['PLAYERS AND ELECTRONICS','HARDWARE']
# others_families = ['AUTOMOTIVE', 'BEAUTY','CELEBRATION', 'LADIESWEAR', 'LAWN AND GARDEN', 'LIQUOR,WINE,BEER', 'PET SUPPLIES']
# # Apply the same preprocessing steps as done during training
# input_df['products'] = np.where(input_df['products'].isin(food_families), 'FOODS', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(home_families), 'HOME', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(clothing_families), 'CLOTHING', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(grocery_families), 'GROCERY', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(stationery_families), 'STATIONERY', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(cleaning_families), 'CLEANING', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(hardware_families), 'HARDWARE', input_df['products'])
# input_df['products'] = np.where(input_df['products'].isin(others_families), 'OTHERS', input_df['products'])
categorical_columns = ['family', 'state', 'store_type']
numerical_columns = ['store_nbr', 'onpromotion',
'cluster', 'dcoilwtico', 'month', 'day']
# Impute missing values
input_df_cat = input_df[categorical_columns].copy()
input_df_num = input_df[numerical_columns].copy()
input_df_cat_imputed = cat_imputer.fit_transform(input_df_cat)
input_df_num_imputed = num_imputer.fit_transform(input_df_num)
# Encode categorical features
input_df_cat_encoded = encoder.fit(input_df_cat_imputed)
input_df_cat_encoded = pd.DataFrame(encoder.transform(input_df_cat_imputed).toarray(),
columns=encoder.get_feature_names_out(categorical_columns))
# Scale numerical features
input_df_num_scaled = scaler.fit_transform(input_df_num_imputed)
input_df_num_sc = pd.DataFrame(
input_df_num_scaled, columns=numerical_columns)
# Combine encoded categorical features and scaled numerical features
input_df_processed = pd.concat(
[input_df_num_sc, input_df_cat_encoded], axis=1)
# Make predictions using the trained model
predictions = dt_model.predict(input_df_processed)
# predictions = model.predict(input_df_processed)
# Display the predicted sales value to the user:
st.write("Predicted Sales:", predictions[0])