import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots import warnings import streamlit as st warnings.filterwarnings('ignore') from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import LabelEncoder from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.naive_bayes import GaussianNB from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.neural_network import MLPClassifier from sklearn.ensemble import AdaBoostClassifier from sklearn.ensemble import GradientBoostingClassifier from sklearn.ensemble import ExtraTreesClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from xgboost import XGBClassifier from sklearn import metrics from sklearn.metrics import roc_curve from sklearn.metrics import recall_score, confusion_matrix, precision_score, f1_score, accuracy_score, classification_report import func as fc from io import StringIO st.set_page_config(layout='wide') tab1, tab2 = st.tabs(['Data','ML']) #loading the options list from the functions file func.py optionList = fc.OPTION_LIST modelList = fc.MODEL_SELECTOR #option to upload the dataframe with tab1: option = st.selectbox('Select the plot you want to visualize',optionList) uploaded_dataframe = st.file_uploader("Choose a file") #print(type(uploaded_dataframe)) if uploaded_dataframe is not None: if option is not None : fig1,fig2,fig3,fig4,fig5,fig6,fig7,fig8,fig9,fig10,fig11,fig12,fig13, processed_df = fc.take_input(uploaded_dataframe) with tab1: st.dataframe(processed_df) with st.container(): col1, col2, col3 = st.columns(3) with col1: st.plotly_chart(fig1, use_container_width=True) with col2: st.plotly_chart(fig2,use_container_width=True) with col3: st.plotly_chart(fig3,use_container_width=True) with st.container(): col1, col2, col3 = st.columns(3) with col1: st.plotly_chart(fig4, use_container_width=True) with col2: st.plotly_chart(fig5,use_container_width=True) with col3: st.plotly_chart(fig6,use_container_width=True) with st.container(): col1, col2, col3 = st.columns(3) with col1: st.plotly_chart(fig7, use_container_width=True) with col2: st.plotly_chart(fig8,use_container_width=True) with col3: st.plotly_chart(fig9,use_container_width=True) with st.container(): col1, col2, col3, col4 = st.columns(4) with col1: st.plotly_chart(fig10, use_container_width=True) with col2: st.plotly_chart(fig11,use_container_width=True) with col3: st.plotly_chart(fig12,use_container_width=True) with col4: st.plotly_chart(fig13,use_container_width=True) #removing the secondary tab #with tab2: # st.plotly_chart(figure,use_container_width=True) with tab2: modeloption = st.selectbox('Select an ML Model',modelList) uploaded_dataframe = st.file_uploader("Choose a file", key=2) test_size_slider = st.slider('Enter the test size: ',0.0,1.0) random_state_input = st.number_input('Select a random seed',0,1000) #print(test_size_slider) if uploaded_dataframe is not None: #Add a slider later the test_size, and a input box for the random state #print(uploaded_dataframe) acc_score, classification_rep, output_df,original_df = fc.standardize_dataframe(uploaded_dataframe,modeloption,test_size_slider,random_state_input) st.dataframe(output_df) #st.write('Accuracy Score of '+modeloption+' is: '+str(acc_score)) st.metric(label='Accuracy Score of '+modeloption,value=str(acc_score)) st.markdown('```bash \t \n'+classification_rep+'```') #print(acc_score,'\n',classification_rep) st.write('Enter some information to predict the churn:') pr_1 = st.selectbox('Select the gender:',['Female','Male']) pr_2 = st.selectbox('Is the customer a senior citizen?',['Yes','No']) pr_3 = st.selectbox('Does the customer have a partner?',['Yes','No']) pr_4 = st.selectbox('Does the customer have dependents?',['Yes','No']) pr_5 = st.number_input('What is the customer tenure?',0,100) pr_6 = st.selectbox('Does the customer have phone service?',['Yes','No']) pr_7 = st.selectbox('Does the customer have multiple lines?',['Yes','No','No phone service']) pr_8 = st.selectbox('Does the customer have internet service?',['No','DSL','Fiber optic']) pr_9 = st.selectbox('Does the customer have online security?',['Yes','No','No internet service']) pr_10 = st.selectbox('Does the customer have online backup?',['Yes','No','No internet service']) pr_11 = st.selectbox('Does the customer have device protection?',['Yes','No','No internet service']) pr_12 = st.selectbox('Does the customer have tech support?',['Yes','No','No internet service']) pr_13 = st.selectbox('Does the customer have streaming TV?',['Yes','No','No internet service']) pr_14 = st.selectbox('Does the customer have streaming movies?',['Yes','No','No internet service']) pr_15 = st.selectbox('Does the customer have a contract?',['Month-to-month','One year','Two year']) pr_16 = st.selectbox('Does the customer have paperless billing?',['Yes','No']) pr_17 = st.selectbox('What is the payment method of the customer?',['Electronic check','Mailed check','Bank transfer (automatic)','Credit card (automatic)']) pr_18 = st.number_input('What are the monthly charges of the customer?') pr_19 = st.number_input('What are the total charges of the customer?') if st.button('Predict Churn'): #convert the inputs to a vector and pass it to a voting classifier algorithm feature_vector = pd.DataFrame({'customerID':[1], 'gender':[pr_1], 'SeniorCitizen':[pr_2], 'Partner':[pr_3], 'Dependents':[pr_4], 'tenure':[pr_5], 'PhoneService':[pr_6], 'MultipleLines':[pr_7], 'InternetService':[pr_8], 'OnlineSecurity':[pr_9], 'OnlineBackup':[pr_10], 'DeviceProtection':[pr_11], 'TechSupport':[pr_12], 'StreamingTV':[pr_13], 'StreamingMovies':[pr_14], 'Contract':[pr_15], 'PaperlessBilling':[pr_16], 'PaymentMethod':[pr_17], 'MonthlyCharges':[pr_18], 'TotalCharges':[pr_19]}) #passing the feature vector to be processed and predict a churn output #print(feature_vector) response = fc.standardize_feature_vector(feature_vector,original_df,test_size_slider,random_state_input) st.metric(label='Prediction Response',value=response)