Spaces:
Sleeping
Sleeping
File size: 8,147 Bytes
5fbe234 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import streamlit as st
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn import metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report
import func as fc
from io import StringIO
st.set_page_config(layout='wide')
tab1, tab2 = st.tabs(['Data','ML'])
#loading the options list from the functions file func.py
optionList = fc.OPTION_LIST
modelList = fc.MODEL_SELECTOR
#option to upload the dataframe
with tab1:
option = st.selectbox('Select the plot you want to visualize',optionList)
uploaded_dataframe = st.file_uploader("Choose a file")
#print(type(uploaded_dataframe))
if uploaded_dataframe is not None:
if option is not None :
fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df = fc.take_input(uploaded_dataframe)
with tab1:
st.dataframe(processed_df)
with st.container():
col1, col2, col3 = st.columns(3)
with col1:
st.plotly_chart(fig1, use_container_width=True)
with col2:
st.plotly_chart(fig2,use_container_width=True)
with col3:
st.plotly_chart(fig3,use_container_width=True)
with st.container():
col1, col2, col3 = st.columns(3)
with col1:
st.plotly_chart(fig4, use_container_width=True)
with col2:
st.plotly_chart(fig5,use_container_width=True)
with col3:
st.plotly_chart(fig6,use_container_width=True)
with st.container():
col1, col2, col3 = st.columns(3)
with col1:
st.plotly_chart(fig7, use_container_width=True)
with col2:
st.plotly_chart(fig8,use_container_width=True)
with col3:
st.plotly_chart(fig9,use_container_width=True)
with st.container():
col1, col2, col3, col4 = st.columns(4)
with col1:
st.plotly_chart(fig10, use_container_width=True)
with col2:
st.plotly_chart(fig11,use_container_width=True)
with col3:
st.plotly_chart(fig12,use_container_width=True)
with col4:
st.plotly_chart(fig13,use_container_width=True)
#removing the secondary tab
#with tab2:
# st.plotly_chart(figure,use_container_width=True)
with tab2:
modeloption = st.selectbox('Select an ML Model',modelList)
uploaded_dataframe = st.file_uploader("Choose a file", key=2)
test_size_slider = st.slider('Enter the test size: ',0.0,1.0)
random_state_input = st.number_input('Select a random seed',0,1000)
#print(test_size_slider)
if uploaded_dataframe is not None:
#Add a slider later the test_size, and a input box for the random state
#print(uploaded_dataframe)
acc_score, classification_rep, output_df,original_df = fc.standardize_dataframe(uploaded_dataframe,modeloption,test_size_slider,random_state_input)
st.dataframe(output_df)
#st.write('Accuracy Score of '+modeloption+' is: '+str(acc_score))
st.metric(label='Accuracy Score of '+modeloption,value=str(acc_score))
st.markdown('```bash \t \n'+classification_rep+'```')
#print(acc_score,'\n',classification_rep)
st.write('Enter some information to predict the churn:')
pr_1 = st.selectbox('Select the gender:',['Female','Male'])
pr_2 = st.selectbox('Is the customer a senior citizen?',['Yes','No'])
pr_3 = st.selectbox('Does the customer have a partner?',['Yes','No'])
pr_4 = st.selectbox('Does the customer have dependents?',['Yes','No'])
pr_5 = st.number_input('What is the customer tenure?',0,100)
pr_6 = st.selectbox('Does the customer have phone service?',['Yes','No'])
pr_7 = st.selectbox('Does the customer have multiple lines?',['Yes','No','No phone service'])
pr_8 = st.selectbox('Does the customer have internet service?',['No','DSL','Fiber optic'])
pr_9 = st.selectbox('Does the customer have online security?',['Yes','No','No internet service'])
pr_10 = st.selectbox('Does the customer have online backup?',['Yes','No','No internet service'])
pr_11 = st.selectbox('Does the customer have device protection?',['Yes','No','No internet service'])
pr_12 = st.selectbox('Does the customer have tech support?',['Yes','No','No internet service'])
pr_13 = st.selectbox('Does the customer have streaming TV?',['Yes','No','No internet service'])
pr_14 = st.selectbox('Does the customer have streaming movies?',['Yes','No','No internet service'])
pr_15 = st.selectbox('Does the customer have a contract?',['Month-to-month','One year','Two year'])
pr_16 = st.selectbox('Does the customer have paperless billing?',['Yes','No'])
pr_17 = st.selectbox('What is the payment method of the customer?',['Electronic check','Mailed check','Bank transfer (automatic)','Credit card (automatic)'])
pr_18 = st.number_input('What are the monthly charges of the customer?')
pr_19 = st.number_input('What are the total charges of the customer?')
if st.button('Predict Churn'):
#convert the inputs to a vector and pass it to a voting classifier algorithm
feature_vector = pd.DataFrame({'customerID':[1],
'gender':[pr_1],
'SeniorCitizen':[pr_2],
'Partner':[pr_3],
'Dependents':[pr_4],
'tenure':[pr_5],
'PhoneService':[pr_6],
'MultipleLines':[pr_7],
'InternetService':[pr_8],
'OnlineSecurity':[pr_9],
'OnlineBackup':[pr_10],
'DeviceProtection':[pr_11],
'TechSupport':[pr_12],
'StreamingTV':[pr_13],
'StreamingMovies':[pr_14],
'Contract':[pr_15],
'PaperlessBilling':[pr_16],
'PaymentMethod':[pr_17],
'MonthlyCharges':[pr_18],
'TotalCharges':[pr_19]})
#passing the feature vector to be processed and predict a churn output
#print(feature_vector)
response = fc.standardize_feature_vector(feature_vector,original_df,test_size_slider,random_state_input)
st.metric(label='Prediction Response',value=response)
|