Spaces:

ahishamm
/

Updated_BI_Project

Sleeping

File size: 8,147 Bytes

5fbe234

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots 
import warnings
import streamlit as st 
warnings.filterwarnings('ignore') 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
import func as fc 
from io import StringIO
st.set_page_config(layout='wide')  
tab1, tab2 = st.tabs(['Data','ML'])
#loading the options list from the functions file func.py 
optionList = fc.OPTION_LIST
modelList = fc.MODEL_SELECTOR
#option to upload the dataframe
with tab1: 
    option = st.selectbox('Select the plot you want to visualize',optionList)
    uploaded_dataframe = st.file_uploader("Choose a file")
    #print(type(uploaded_dataframe))
if uploaded_dataframe is not None: 
    if option is not None : 
        fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df = fc.take_input(uploaded_dataframe)
        with tab1: 
            st.dataframe(processed_df) 
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig1, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig2,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig3,use_container_width=True)
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig4, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig5,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig6,use_container_width=True)
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig7, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig8,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig9,use_container_width=True)
            with st.container(): 
                col1, col2, col3, col4 = st.columns(4) 
                with col1: 
                    st.plotly_chart(fig10, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig11,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig12,use_container_width=True)
                with col4: 
                    st.plotly_chart(fig13,use_container_width=True)
                       

        #removing the secondary tab 
        #with tab2: 
        #    st.plotly_chart(figure,use_container_width=True)  
with tab2: 
    modeloption = st.selectbox('Select an ML Model',modelList)
    uploaded_dataframe = st.file_uploader("Choose a file", key=2)
    test_size_slider = st.slider('Enter the test size: ',0.0,1.0)
    random_state_input = st.number_input('Select a random seed',0,1000)
    #print(test_size_slider)
    if uploaded_dataframe is not None:    
        #Add a slider later the test_size, and a input box for the random state
        #print(uploaded_dataframe)
        acc_score, classification_rep, output_df,original_df = fc.standardize_dataframe(uploaded_dataframe,modeloption,test_size_slider,random_state_input) 
        st.dataframe(output_df)
        #st.write('Accuracy Score of '+modeloption+' is: '+str(acc_score))
        st.metric(label='Accuracy Score of '+modeloption,value=str(acc_score))
        st.markdown('```bash \t \n'+classification_rep+'```')
        #print(acc_score,'\n',classification_rep)
        st.write('Enter some information to predict the churn:')
        pr_1 = st.selectbox('Select the gender:',['Female','Male'])
        pr_2 = st.selectbox('Is the customer a senior citizen?',['Yes','No'])
        pr_3 = st.selectbox('Does the customer have a partner?',['Yes','No']) 
        pr_4 = st.selectbox('Does the customer have dependents?',['Yes','No']) 
        pr_5 = st.number_input('What is the customer tenure?',0,100)
        pr_6 = st.selectbox('Does the customer have phone service?',['Yes','No']) 
        pr_7 = st.selectbox('Does the customer have multiple lines?',['Yes','No','No phone service'])
        pr_8 = st.selectbox('Does the customer have internet service?',['No','DSL','Fiber optic'])
        pr_9 = st.selectbox('Does the customer have online security?',['Yes','No','No internet service'])
        pr_10 = st.selectbox('Does the customer have online backup?',['Yes','No','No internet service'])
        pr_11 = st.selectbox('Does the customer have device protection?',['Yes','No','No internet service'])
        pr_12 = st.selectbox('Does the customer have tech support?',['Yes','No','No internet service'])
        pr_13 = st.selectbox('Does the customer have streaming TV?',['Yes','No','No internet service'])
        pr_14 = st.selectbox('Does the customer have streaming movies?',['Yes','No','No internet service'])
        pr_15 = st.selectbox('Does the customer have a contract?',['Month-to-month','One year','Two year']) 
        pr_16 = st.selectbox('Does the customer have paperless billing?',['Yes','No']) 
        pr_17 = st.selectbox('What is the payment method of the customer?',['Electronic check','Mailed check','Bank transfer (automatic)','Credit card (automatic)'])
        pr_18 = st.number_input('What are the monthly charges of the customer?')
        pr_19 = st.number_input('What are the total charges of the customer?')
        if st.button('Predict Churn'): 
            #convert the inputs to a vector and pass it to a voting classifier algorithm
            feature_vector = pd.DataFrame({'customerID':[1],
                                           'gender':[pr_1], 
                                          'SeniorCitizen':[pr_2],
                                          'Partner':[pr_3],
                                          'Dependents':[pr_4],
                                          'tenure':[pr_5],
                                          'PhoneService':[pr_6],
                                          'MultipleLines':[pr_7],
                                          'InternetService':[pr_8],
                                          'OnlineSecurity':[pr_9],
                                          'OnlineBackup':[pr_10],
                                          'DeviceProtection':[pr_11],
                                          'TechSupport':[pr_12], 
                                          'StreamingTV':[pr_13],
                                          'StreamingMovies':[pr_14],
                                          'Contract':[pr_15],
                                          'PaperlessBilling':[pr_16],
                                          'PaymentMethod':[pr_17],
                                          'MonthlyCharges':[pr_18],
                                          'TotalCharges':[pr_19]})
            #passing the feature vector to be processed and predict a churn output 
            #print(feature_vector)
            response = fc.standardize_feature_vector(feature_vector,original_df,test_size_slider,random_state_input)
            st.metric(label='Prediction Response',value=response)