File size: 6,220 Bytes
cd2c21d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import pandas as pd
import streamlit as st
import numpy as np
import pickle
# streamlit import sklearn
from PIL import Image
import os
import xgboost as xgb

# model = xgb.XGBRegressor()
# model = model.load_model('export/xg_model.json')

# Load the saved components:
with open(".\export\dt_model.pkl", "rb") as f:
    components = pickle.load(f)

# Extract the individual components
num_imputer = components["num_imputer"]
cat_imputer = components["cat_imputer"]
encoder = components["encoder"]
scaler = components["scaler"]
dt_model = components["models"]

# Create the app

st.set_page_config(
    layout="wide"
)


# # Add an image or logo to the app
# image = Image.open('copofav.jpg')

# # Open the image file
# st.image(image)


# add app title
st.title("SALES PREDICTION APP")


# Add some text
st.write("Please ENTER the relevant data and CLICK Predict.")

# Create the input fields
input_data = {}
col1, col2, col3 = st.columns(3)
with col1:
    input_data['store_nbr'] = st.slider(
        "Store Number", min_value=0, step=1, max_value=54)
    input_data['family'] = st.selectbox("Products Family", ['AUTOMOTIVE', 'BABY CARE', 'BEAUTY', 'BEVERAGES', 'BOOKS',
                                                            'BREAD/BAKERY', 'CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS',
                                                            'FROZEN FOODS', 'GROCERY I', 'GROCERY II', 'HARDWARE',
                                                            'HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES',
                                                            'HOME CARE', 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE',
                                                            'LIQUOR,WINE,BEER', 'MAGAZINES', 'MEATS', 'PERSONAL CARE',
                                                            'PET SUPPLIES', 'PLAYERS AND ELECTRONICS', 'POULTRY',
                                                            'PREPARED FOODS', 'PRODUCE', 'SCHOOL AND OFFICE SUPPLIES',
                                                            'SEAFOOD'])
    input_data['onpromotion'] = st.number_input(
        "Discount Amt On Promotion", step=1)

with col2:
    input_data['state'] = st.selectbox("State", ['Santa Elena', 'Pichincha', 'Cotopaxi', 'Chimborazo', 'Imbabura',
                                                 'Santo Domingo de los Tsachilas', 'Bolivar', 'Tungurahua',
                                                 'Guayas', 'Los Rios', 'Azuay', 'Loja', 'El Oro', 'Esmeraldas',
                                                 'Manabi', 'Pastaza'])
    input_data['store_type'] = st.radio(
        "Store Type", options=['A', 'B', 'C', 'D', 'E'], horizontal=True)
    input_data['cluster'] = st.number_input("Cluster", step=1)

with col3:
    input_data['month'] = st.slider("Month", 1, 12)
    input_data['day'] = st.slider("Day", 1, 31)
    input_data['dcoilwtico'] = st.slider(
        "DCOILWTICO", min_value=29, step=1, max_value=108)

  # Create a button to make a prediction
if st.button("Predict"):
    # Convert the input data to a pandas DataFrame
    input_df = pd.DataFrame([input_data])

    # # categorizing the products
    # food_families = ['BEVERAGES', 'BREAD/BAKERY', 'FROZEN FOODS', 'MEATS', 'PREPARED FOODS', 'DELI','PRODUCE', 'DAIRY','POULTRY','EGGS','SEAFOOD']
    # home_families = ['HOME AND KITCHEN I', 'HOME AND KITCHEN II', 'HOME APPLIANCES']
    # clothing_families = ['LINGERIE', 'LADYSWARE']
    # grocery_families = ['GROCERY I', 'GROCERY II']
    # stationery_families = ['BOOKS', 'MAGAZINES','SCHOOL AND OFFICE SUPPLIES']
    # cleaning_families = ['HOME CARE', 'BABY CARE','PERSONAL CARE']
    # hardware_families = ['PLAYERS AND ELECTRONICS','HARDWARE']
    # others_families = ['AUTOMOTIVE', 'BEAUTY','CELEBRATION', 'LADIESWEAR', 'LAWN AND GARDEN', 'LIQUOR,WINE,BEER',  'PET SUPPLIES']

    # # Apply the same preprocessing steps as done during training
    # input_df['products'] = np.where(input_df['products'].isin(food_families), 'FOODS', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(home_families), 'HOME', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(clothing_families), 'CLOTHING', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(grocery_families), 'GROCERY', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(stationery_families), 'STATIONERY', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(cleaning_families), 'CLEANING', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(hardware_families), 'HARDWARE', input_df['products'])
    # input_df['products'] = np.where(input_df['products'].isin(others_families), 'OTHERS', input_df['products'])

    categorical_columns = ['family', 'state', 'store_type']
    numerical_columns = ['store_nbr', 'onpromotion',
                         'cluster', 'dcoilwtico', 'month', 'day']
    # Impute missing values
    input_df_cat = input_df[categorical_columns].copy()
    input_df_num = input_df[numerical_columns].copy()
    input_df_cat_imputed = cat_imputer.fit_transform(input_df_cat)
    input_df_num_imputed = num_imputer.fit_transform(input_df_num)

    # Encode categorical features
    input_df_cat_encoded = encoder.fit(input_df_cat_imputed)
    input_df_cat_encoded = pd.DataFrame(encoder.transform(input_df_cat_imputed).toarray(),
                                        columns=encoder.get_feature_names_out(categorical_columns))

    # Scale numerical features
    input_df_num_scaled = scaler.fit_transform(input_df_num_imputed)
    input_df_num_sc = pd.DataFrame(
        input_df_num_scaled, columns=numerical_columns)

    # Combine encoded categorical features and scaled numerical features
    input_df_processed = pd.concat(
        [input_df_num_sc, input_df_cat_encoded], axis=1)

    # Make predictions using the trained model
    predictions = dt_model.predict(input_df_processed)
    # predictions = model.predict(input_df_processed)

    # Display the predicted sales value to the user:
    st.write("Predicted Sales:", predictions[0])