File size: 8,147 Bytes
5fbe234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly.express as px 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots 
import warnings
import streamlit as st 
warnings.filterwarnings('ignore') 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
import func as fc 
from io import StringIO
st.set_page_config(layout='wide')  
tab1, tab2 = st.tabs(['Data','ML'])
#loading the options list from the functions file func.py 
optionList = fc.OPTION_LIST
modelList = fc.MODEL_SELECTOR
#option to upload the dataframe
with tab1: 
    option = st.selectbox('Select the plot you want to visualize',optionList)
    uploaded_dataframe = st.file_uploader("Choose a file")
    #print(type(uploaded_dataframe))
if uploaded_dataframe is not None: 
    if option is not None : 
        fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df = fc.take_input(uploaded_dataframe)
        with tab1: 
            st.dataframe(processed_df) 
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig1, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig2,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig3,use_container_width=True)
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig4, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig5,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig6,use_container_width=True)
            with st.container(): 
                col1, col2, col3 = st.columns(3) 
                with col1: 
                    st.plotly_chart(fig7, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig8,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig9,use_container_width=True)
            with st.container(): 
                col1, col2, col3, col4 = st.columns(4) 
                with col1: 
                    st.plotly_chart(fig10, use_container_width=True)
                with col2: 
                    st.plotly_chart(fig11,use_container_width=True)
                with col3: 
                    st.plotly_chart(fig12,use_container_width=True)
                with col4: 
                    st.plotly_chart(fig13,use_container_width=True)
                       

        #removing the secondary tab 
        #with tab2: 
        #    st.plotly_chart(figure,use_container_width=True)  
with tab2: 
    modeloption = st.selectbox('Select an ML Model',modelList)
    uploaded_dataframe = st.file_uploader("Choose a file", key=2)
    test_size_slider = st.slider('Enter the test size: ',0.0,1.0)
    random_state_input = st.number_input('Select a random seed',0,1000)
    #print(test_size_slider)
    if uploaded_dataframe is not None:    
        #Add a slider later the test_size, and a input box for the random state
        #print(uploaded_dataframe)
        acc_score, classification_rep, output_df,original_df = fc.standardize_dataframe(uploaded_dataframe,modeloption,test_size_slider,random_state_input) 
        st.dataframe(output_df)
        #st.write('Accuracy Score of '+modeloption+' is: '+str(acc_score))
        st.metric(label='Accuracy Score of '+modeloption,value=str(acc_score))
        st.markdown('```bash \t \n'+classification_rep+'```')
        #print(acc_score,'\n',classification_rep)
        st.write('Enter some information to predict the churn:')
        pr_1 = st.selectbox('Select the gender:',['Female','Male'])
        pr_2 = st.selectbox('Is the customer a senior citizen?',['Yes','No'])
        pr_3 = st.selectbox('Does the customer have a partner?',['Yes','No']) 
        pr_4 = st.selectbox('Does the customer have dependents?',['Yes','No']) 
        pr_5 = st.number_input('What is the customer tenure?',0,100)
        pr_6 = st.selectbox('Does the customer have phone service?',['Yes','No']) 
        pr_7 = st.selectbox('Does the customer have multiple lines?',['Yes','No','No phone service'])
        pr_8 = st.selectbox('Does the customer have internet service?',['No','DSL','Fiber optic'])
        pr_9 = st.selectbox('Does the customer have online security?',['Yes','No','No internet service'])
        pr_10 = st.selectbox('Does the customer have online backup?',['Yes','No','No internet service'])
        pr_11 = st.selectbox('Does the customer have device protection?',['Yes','No','No internet service'])
        pr_12 = st.selectbox('Does the customer have tech support?',['Yes','No','No internet service'])
        pr_13 = st.selectbox('Does the customer have streaming TV?',['Yes','No','No internet service'])
        pr_14 = st.selectbox('Does the customer have streaming movies?',['Yes','No','No internet service'])
        pr_15 = st.selectbox('Does the customer have a contract?',['Month-to-month','One year','Two year']) 
        pr_16 = st.selectbox('Does the customer have paperless billing?',['Yes','No']) 
        pr_17 = st.selectbox('What is the payment method of the customer?',['Electronic check','Mailed check','Bank transfer (automatic)','Credit card (automatic)'])
        pr_18 = st.number_input('What are the monthly charges of the customer?')
        pr_19 = st.number_input('What are the total charges of the customer?')
        if st.button('Predict Churn'): 
            #convert the inputs to a vector and pass it to a voting classifier algorithm
            feature_vector = pd.DataFrame({'customerID':[1],
                                           'gender':[pr_1], 
                                          'SeniorCitizen':[pr_2],
                                          'Partner':[pr_3],
                                          'Dependents':[pr_4],
                                          'tenure':[pr_5],
                                          'PhoneService':[pr_6],
                                          'MultipleLines':[pr_7],
                                          'InternetService':[pr_8],
                                          'OnlineSecurity':[pr_9],
                                          'OnlineBackup':[pr_10],
                                          'DeviceProtection':[pr_11],
                                          'TechSupport':[pr_12], 
                                          'StreamingTV':[pr_13],
                                          'StreamingMovies':[pr_14],
                                          'Contract':[pr_15],
                                          'PaperlessBilling':[pr_16],
                                          'PaymentMethod':[pr_17],
                                          'MonthlyCharges':[pr_18],
                                          'TotalCharges':[pr_19]})
            #passing the feature vector to be processed and predict a churn output 
            #print(feature_vector)
            response = fc.standardize_feature_vector(feature_vector,original_df,test_size_slider,random_state_input)
            st.metric(label='Prediction Response',value=response)