import streamlit as st import pandas as pd import numpy as np import pickle from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() # load trained model lgbm_base = pickle.load(open('lgbm_base.pkl', 'rb')) lgbm_opt = pickle.load(open('lgbm_optimized.pkl', 'rb')) np.random.seed(42) st.markdown("", unsafe_allow_html=True) st.markdown("

House Price Prediction in Ames,Iowa

", unsafe_allow_html=True) st.markdown("
Optuna optimized LGBM model to estimate the range of house prices based on your selection.
", unsafe_allow_html=True) name_list = [ 'OverallQual', 'YearBuilt', 'TotalBsmtSF', 'GrLivArea', 'MasVnrArea', 'BsmtFinType1', 'Neighborhood', 'GarageType', 'SaleCondition', 'BsmtExposure'] name_list_train = [ 'OverallQual', 'YearBuilt', 'TotalBsmtSF', 'GrLivArea', 'MasVnrArea', 'BsmtFinType1', 'Neighborhood', 'GarageType', 'SaleCondition', 'BsmtExposure'] #data = pd.read_csv('data/train.csv') #data = data[name_list_train].values #st.write(data[0]) #scaler.fit(data) description_list = [ 'What is the Overall material and finish quality?', 'In which year was the Original construction date?', 'What is the Total square feet of basement area?', 'What is the Above grade (ground) living area in square feet?', 'What is the Masonry veneer area in square feet?', 'What is the Quality of the basement finished area?', 'Where are the physical locations within Ames city limits?', 'Where is the location of the Garage?', 'What is the condition of the sale?', 'Does the house have walkout or garden-level basement walls?' ] min_list = [ 1.0, 1950.0, 0.0, 0.0, 334.0, 1.0, 1.0, 1.0, 1.0, 0.0 ] max_list = [ 10.0, 2010.0, 2336.0, 6110.0, 4692.0, 7.0, 25.0, 7.0, 6.0, 5.0, ] count = 0 with st.sidebar: for i in range(len(name_list)): variable_name = name_list[i] globals()[variable_name] = st.slider(description_list[i] ,min_value=int(min_list[i]), max_value =int(max_list[i]),step=1) st.write("[Kaggle Link to Data Set](https://www.kaggle.com/competitions/house-prices-advanced-regression-techniques)") data_df = { 'OverallQual': [OverallQual], 'YearBuilt': [YearBuilt], 'TotalBsmtSF': [TotalBsmtSF], 'GrLivArea':[GrLivArea], 'MasVnrArea': [MasVnrArea], 'BsmtFinType1': [BsmtFinType1], 'Neighborhood': [Neighborhood], 'GarageType': [GarageType], 'SaleCondition': [SaleCondition], 'BsmtExposure': [BsmtExposure] } #negloglik = lambda y, p_y: -p_y.log_prob(y) # note this data_df = pd.DataFrame.from_dict(data_df) #data_df_normal = scaler.transform(data_df) st.write(data_df.head()) y_pred_base = lgbm_base.predict(data_df) #y_pred_optimized = lgbm_opt.predict(data_df) col1, col2, col3 , col4, col5 = st.columns(5) with col1: pass with col2: pass with col4: pass with col5: pass with col3 : center_button = st.button('Calculate range of house price') if center_button: import time #my_bar = st.progress(0) with st.spinner('Calculating....'): time.sleep(2) st.markdown("
The price range of your house is between:
", unsafe_allow_html=True) col1, col2 = st.columns([3, 3]) lower_number = "{:,.2f}".format(int(y_pred_optimized.mean().numpy()-1.95*yhat.stddev().numpy())) higher_number = "{:,.2f}".format(int(y_pred_optimized.mean().numpy()+1.95*yhat.stddev().numpy())) col1, col2, col3 = st.columns(3) with col1: st.write("") with col2: st.subheader("USD "+ str(lower_number)) st.subheader(" AND ") st.subheader(" USD "+str(higher_number)) with col3: st.write("")