Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
#### Importing Modules #### | |
import base64 | |
import pandas as pd | |
import streamlit as st | |
from autoclean import data_clean | |
from model_pipeline_steps import get_problem_type1, model_build | |
from PIL import Image | |
from DA_P1 import get_details, imbalnce_ratio, word_cloud, plotly_target, plot_ngram | |
import pickle | |
from NLP_text_classification import model_train, predict_text, predict_csv | |
from kmeans import k_means | |
from jinja2.ext import i18n | |
info = {} | |
#********* Handling rturn variable in cache memory to solve reloading issue in streamlit ******# | |
def get_details_local(data): | |
final_output = get_details(data) | |
return final_output | |
def clean(dataset, drop_features): | |
cleaned_data, steps_dict = data_clean(dataset, drop_features) | |
return cleaned_data, steps_dict | |
def get_problem_type_local(cleaned_data, target_data): | |
p_type = get_problem_type1(cleaned_data, target_data) | |
return p_type | |
def model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict): | |
model = model_build(cleaned_data, target_data, p_type, balance_data, steps_dict) | |
return model | |
def model_train_local(dataset, input_feature, target_data, balance_data): | |
model_info = model_train(dataset, input_feature, target_data, balance_data) | |
return model_info | |
def word_cloud_local(dataset, input_col): | |
plt = word_cloud(dataset, input_col) | |
return plt | |
def plotly_target_local(dataset, tg_col): | |
plt = plotly_target(dataset, tg_col) | |
return plt | |
def plot_ngram_local(dataset, tg_col): | |
plt = plot_ngram(dataset, tg_col) | |
return plt | |
#******************************************************************# | |
def main(): | |
try: | |
# setting tab title and icon | |
st.set_page_config(page_title="AiNext", | |
page_icon="image.png") | |
# Hiding streamlit wateermark | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
# To do Navigation Menu | |
st.markdown( | |
'<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">', | |
unsafe_allow_html=True) | |
st.markdown(""" | |
<nav class="navbar fixed-top navbar-expand-lg navbar-dark" style="background-color: #AED6F1;"> | |
<a class="navbar-brand" href=""><b><font color = "#8b0000">Ai</font><i style="color:#1997E5 ;">Next</i></b></a> | |
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation"> | |
<span class="navbar-toggler-icon"></span> | |
</button> | |
<div class="collapse navbar-collapse" id="navbarNav"> | |
<ul class="navbar-nav"> | |
<li class="nav-item active"> | |
<a class="nav-link disabled" href="#" style="color:black ;">Home <span class="sr-only">(current)</span></a> | |
</li> | |
<li class="nav-item"> | |
<a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:black ;" target="_blank">Contact Us</a> | |
</li> | |
</ul> | |
</div> | |
<div> | |
<a style="color:red;" href="https://www.datamatics.com/" target="_blank"><b>DATAMATICS</b></a> | |
</div> | |
</nav> | |
""", unsafe_allow_html=True) | |
# Image in sidebar and link to mail | |
image_loan = Image.open("AI.jpg") | |
st.sidebar.image(image_loan, use_column_width=True) | |
st.sidebar.markdown( | |
"""<a class="nav-link" href="mailto: technology.coe@digital.datamatics.com" style="color:white ;" target="_blank">Mail us at - <u>technology.coe@digital.datamatics.com</u></a>""", | |
unsafe_allow_html=True) | |
# Upload CSV File | |
st.header("Upload Input csv file") | |
file_upload = st.file_uploader(" ", type=["csv"]) | |
if file_upload is not None: | |
# Selecting Experiment type (Supervised or UnSupervised) | |
st.subheader("Select the Experiment type") | |
exp_type = st.selectbox(label=' ', options=['Select', 'Supervised', 'UnSupervised']) | |
print(exp_type) | |
# **************************** Supervised Section ********************************** # | |
if exp_type == "Supervised": | |
st.subheader("Supervised") | |
# read Dataset | |
dataset = pd.read_csv(file_upload) | |
# read columns | |
cols = dataset.columns.tolist() | |
st.text(" ") | |
# Selecting features to drop | |
st.subheader("choose the features which you want to drop") | |
drop_features = st.multiselect('', cols) | |
# print(drop_features) | |
# Selecting target feature | |
st.text(" ") | |
st.subheader("Pick Your Target feature") | |
target_data = st.selectbox(label=' ', options=cols, index=len(cols) - 1) | |
# print(target_data) | |
# **** Following code is to identify problem type is NLP text classification or Predictive analysis using Input feature **** # | |
total_len = len(cols) | |
drop_len = len(drop_features) | |
problem_statement = "" | |
input_feature_temp = "" | |
st.sidebar.text(" ") | |
sidebar_col1, sidebar_col2, sidebar_col3 = st.sidebar.beta_columns(3) | |
if st.checkbox("Check Problem Type"): | |
if (target_data not in drop_features) and ((total_len - drop_len) == 2): | |
temp_data = dataset.drop(drop_features, axis=1) | |
temp_data = temp_data.drop(target_data, axis=1) | |
temp_col = temp_data.columns.tolist() | |
print(temp_data.dtypes[temp_col[0]]) | |
if temp_data.dtypes[temp_col[0]] == "object": | |
print("NLP text Classification") | |
html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>NLP Text Classification</button>" | |
sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
problem_statement = "NLP text Classification" | |
input_feature_temp = temp_col[0] | |
else: | |
html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>" | |
sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
print("Predictive Analytics") | |
problem_statement = "Predictive Analytics" | |
elif (target_data not in drop_features) and ((total_len - drop_len) > 2): | |
html_string = "<button style='border-radius: 12px;algin:center;background-color:#04AA6D;border: none;color: white;padding: 20px;text-align: center;text-decoration: none;display: inline-block;font-size: 16px;margin: 4px 2px;'>Predictive Analytics</button>" | |
sidebar_col2.markdown(html_string, unsafe_allow_html=True) | |
print("Predictive Analytics") | |
problem_statement = "Predictive Analytics" | |
elif (target_data in drop_features): | |
st.error("Selected Target column is also selected to drop.So Can't proceed") | |
#******************************************************************************************# | |
# *********************** Predictive Analytics Section *************************************# | |
if problem_statement == "Predictive Analytics" and problem_statement != "": | |
# ************ Data Analysis Code goes here ********** # | |
final_output = get_details_local(dataset) | |
# print(dataset) | |
st.text(" ") | |
first = dataset.head(10) | |
# last = dataset.tail(10) | |
if st.button("Click here to Analyze Data"): | |
container = st.beta_expander("Data Analysis and visualization Details") | |
# c1,c2=container.beta_columns(2) | |
container.subheader("First 10 Rows") | |
container.write(first) | |
# c2.subheader("Last 10 Rows") | |
# c2.write(last) | |
container.text(" ") | |
overview_con = container.beta_container() | |
overview_con.subheader("Overview of Dataset") | |
overview_con.text(" ") | |
ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3) | |
ov_c1.write("Statistics") | |
for key, value in final_output['overview']['data_statistics'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c1.text(temp) | |
ov_c2.write("Variable Info") | |
for key, value in final_output['overview']['variable_type'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c2.text(temp) | |
ov_c3.write("Reproduction") | |
for key, value in final_output['reproduction'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c3.text(temp) | |
container.text(" ") | |
numeric_con = container.beta_container() | |
numeric_con.subheader("Numeric Variable Information") | |
numeric_con.text(" ") | |
for key, value in final_output['numerical_variable_info']['variable_info'].items(): | |
numeric_con.text(" ") | |
temp_key = "Numeric Column:" + str(key) | |
numeric_con.write(temp_key) | |
num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4) | |
i = 1 | |
for key1, value1 in value.items(): | |
temp = str(key1) + ": " + str(value1) | |
if (i <= 7): | |
num_c1.text(temp) | |
elif (i > 7 and i <= 14): | |
num_c2.text(temp) | |
elif (i > 14 and i <= 21): | |
num_c3.text(temp) | |
elif i > 21 and i <= 24: | |
num_c4.text(temp) | |
elif i > 24: | |
numeric_con.plotly_chart(value1, config={'displaylogo': False}) | |
i = i + 1 | |
container.text(" ") | |
categorical_con = container.beta_container() | |
categorical_con.subheader("Categorical Variable Information") | |
categorical_con.text(" ") | |
for key, value in final_output['categorical_variable_info']['variable_info'].items(): | |
categorical_con.text(" ") | |
temp_key = "Categorical Column:" + str(key) | |
categorical_con.write(temp_key) | |
num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4) | |
i = 1 | |
for key1, value1 in value.items(): | |
temp = str(key1) + ": " + str(value1) | |
if (i <= 5): | |
num_c1.text(temp) | |
elif (i > 5 and i <= 10): | |
num_c2.text(temp) | |
elif (i > 10 and i <= 15): | |
num_c3.text(temp) | |
elif i > 15 and i <= 16: | |
num_c4.text(temp) | |
elif i > 16: | |
categorical_con.plotly_chart(value1, config={'displaylogo': False}) | |
i = i + 1 | |
container.text(" ") | |
container.text("Scatter chart Matrix") | |
container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False}) | |
container.text(" ") | |
container.text(" ") | |
corr_con = container.beta_container() | |
corr_con.subheader("Correlation Matrix Information") | |
corr_con.text(" ") | |
# corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3) | |
# j = 0 | |
for key1, value1 in final_output['correlation_matrix_info'].items(): | |
corr_con.text(" ") | |
corr_con.write(key1) | |
# col.pyplot(value) | |
corr_con.plotly_chart(value1, config={'displaylogo': False}) | |
# col.plotly_chart(value1,use_container_width=True) | |
# j=j+1 | |
container.text(" ") | |
missing_con = container.beta_container() | |
missing_con.subheader("Missing Values Information") | |
missing_con.text(" ") | |
mis_c1, mis_c2 = missing_con.beta_columns(2) | |
mis_c3, mis_c4 = missing_con.beta_columns(2) | |
k = 0 | |
for key, value in final_output['missing_values_info'].items(): | |
corr_con.text(" ") | |
col = mis_c1 | |
if k == 0: | |
col = mis_c1 | |
elif k == 1: | |
col = mis_c2 | |
elif k == 2: | |
col = mis_c3 | |
elif k == 3: | |
col = mis_c4 | |
col.write(key) | |
col.pyplot(value) | |
k = k + 1 | |
# ********************************************************# | |
# ****** Option for handling Imbalanced Dataset ******# | |
st.text(" ") | |
ir_res = imbalnce_ratio(dataset, target_data) | |
ir_res = "Imbalance Ratio (" + ir_res + ")" | |
st.subheader("Select below option to Handle Imbalanced Dataset (optional)") | |
st.text(ir_res) | |
balance_data = st.selectbox(label=' ', options=["Auto", "False"]) | |
#*******************************************************# | |
#********* Data Cleaning and Model Building code goes here *********# | |
st.text(" ") | |
if (st.checkbox('Start build model') is True) and (target_data not in drop_features): | |
st.text(" ") | |
cleaned_data, steps_dict = clean(dataset, drop_features) | |
sample_data = cleaned_data.head() | |
info['clean_data'] = sample_data | |
info['auto_drop'] = steps_dict['auto_drop'] | |
p_type = get_problem_type_local(cleaned_data, target_data) | |
statement_ptype = "Problem type :" + p_type | |
info['problem'] = statement_ptype | |
statement_target = "Target column: " + target_data | |
info['target_statement'] = statement_target | |
info['target'] = target_data | |
model = model_build_local(cleaned_data, target_data, p_type, balance_data, steps_dict) | |
info['model'] = model | |
info['step_dict'] = steps_dict | |
elif target_data in drop_features: | |
st.error("Selected Target column is also selected to drop.So Can't proceed") | |
#**********************************************************************************# | |
# print(info) | |
# ******************* Model Result ***********************# | |
if info: | |
for columns in info['auto_drop']: | |
txt = "automatically dropped column: " + columns | |
st.write(txt) | |
st.text(" ") | |
st.subheader("After Cleaning data") | |
st.write(info['clean_data']) | |
st.write(info['problem']) | |
st.write(info['target_statement']) | |
# print(info['model']) | |
for key, val in info['model'].items(): | |
st.text(" ") | |
# if key == "Regression graph" : | |
# st.write(key) | |
# st.pyplot(val) | |
if key == "Best pipeline" or key == "step_dict": | |
pass | |
elif key == "ROC Curve" or key == "model_comparison" or key == "Regression graph": | |
st.write(key) | |
st.plotly_chart(val, config={'displaylogo': False}) | |
elif key == "Classification Report": | |
st.write(key) | |
st.text(val) | |
elif key == "Handling Imbalanced Dataset": | |
st.write(key) | |
for key1, val1 in val.items(): | |
st.write(key1) | |
st.text(val1) | |
else: | |
st.write(key) | |
st.write(val) | |
st.text(" ") | |
st.text(" ") | |
# ***************************************************************# | |
# ************************** Prediction **************************# | |
st.subheader("Upload csv file for Predictions : ") | |
file_upload1 = st.file_uploader(" ", type=["csv"]) | |
print(file_upload1) | |
if file_upload1 is not None: | |
try: | |
test_data = pd.read_csv(file_upload1) | |
data = test_data.copy() | |
data.drop(info['step_dict']['dropped_features'], axis=1, inplace=True) | |
for col in data.columns: | |
data[col].fillna(info['step_dict']['missing_values'][col], inplace=True) | |
# print(info['target']) | |
for data1 in info['step_dict']['categorical_to_numeric']: | |
for key, value in data1.items(): | |
col_name = key.split('_encoded')[0] | |
if col_name != info['target']: | |
# print(col_name) | |
# print(value) | |
data[col_name].replace(value, inplace=True) | |
if info['target'] in data.columns: data.drop([info['target']], axis=1, inplace=True) | |
final_model = info['model']['Best pipeline'] | |
# print(final_model) | |
predictions = final_model.predict(data) | |
# print(predictions) | |
print(len(test_data)) | |
print(len(predictions)) | |
predict_column_name = info['target'] + "_prediction" | |
test_data[predict_column_name] = predictions | |
for data1 in info['step_dict']['categorical_to_numeric']: | |
for key, value in data1.items(): | |
col_name = key.split('_encoded')[0] | |
if col_name == info['target']: | |
# print(col_name) | |
# print(value) | |
d = {} | |
for i, v in value.items(): | |
d[v] = i | |
test_data[predict_column_name].replace(d, inplace=True) | |
# csv = test_data.to_csv(index=False) | |
# b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here | |
# href = f'<a href="data:file/csv;base64,{b64}">Download The Prediction Results CSV File</a> (right-click and save as <some_name>.csv)' | |
csv = test_data.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() | |
href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
output_model = pickle.dumps(final_model) | |
b64 = base64.b64encode(output_model).decode() | |
href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> ' | |
st.markdown(href, unsafe_allow_html=True) | |
except Exception as e: | |
st.text(e) | |
st.error("Uploaded wrong data for prediction") | |
# ***************************************************************************# | |
# *********************** End of Predictive Analytics Section *************************************# | |
# *********************** NLP text Classification Section *************************************# | |
elif problem_statement == "NLP text Classification" and problem_statement != "": | |
try: | |
# ********* Data Analysis and visualization code ************** # | |
st.text(" ") | |
vis_con = st.beta_expander("Data Visualization") | |
st.text(" ") | |
vis_con.subheader("Select Input Feature") | |
select_col = ["Select"] | |
t_cols = select_col + cols | |
input_col = vis_con.selectbox(label=' ', options=t_cols) | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
res = word_cloud_local(dataset, input_col) | |
if res is not None: vis_con.plotly_chart(res) | |
true_bigrams = plot_ngram_local(dataset, input_col) | |
if true_bigrams is not None: vis_con.plotly_chart(true_bigrams, config={'displaylogo': False}) | |
st.text(" ") | |
vis_con.subheader("Select target Feature") | |
tg_col = vis_con.selectbox(label=' ', options=t_cols) | |
plot_res = plotly_target_local(dataset, tg_col) | |
if plot_res is not None: vis_con.plotly_chart(plot_res, config={'displaylogo': False}) | |
#*****************************************************************************************# | |
# ****** Option for handling Imbalanced Dataset ****** # | |
input_feature = input_feature_temp | |
st.text(" ") | |
ir_res = imbalnce_ratio(dataset, target_data) | |
ir_res = "Imbalance Ratio (" + ir_res + ")" | |
st.subheader("Select below option to Handle Imbalanced Dataset (optional)") | |
st.text(ir_res) | |
balance_data = st.selectbox(label=' ', options=["Auto", "False"]) | |
#***********************************************************# | |
# ********* Data Cleaning and Model Building code goes here *********# | |
st.text(" ") | |
if st.checkbox("Start Build model") and input_feature != target_data: | |
model_info = model_train_local(dataset, input_feature, target_data, balance_data) | |
#************ Model Result ***************# | |
for key, val in model_info.items(): | |
st.text(" ") | |
if key == "Classification Report": | |
st.write(key) | |
st.text(val) | |
elif key == "model_comparison" or key == "ROC Curve": | |
st.write(key) | |
st.plotly_chart(val, config={'displaylogo': False}) | |
elif key == "Handling Imbalanced Dataset": | |
st.write(key) | |
for key1, val1 in val.items(): | |
st.write(key1) | |
st.text(val1) | |
elif key == "Best pipeline" or key == "tfidf_vector": | |
pass | |
else: | |
st.write(key) | |
st.write(val) | |
#***********************************************************# | |
# ****************** Prediction ******************* # | |
c1, c2 = st.beta_columns(2) | |
exp1 = c1.beta_expander("Prediction on text data") | |
exp2 = c2.beta_expander("Prediction on csv data") | |
form_predict = exp1.form("predict") | |
text_val = form_predict.text_area("Enter text for prediction") | |
if form_predict.form_submit_button("Predict") and text_val != "": | |
prediction = predict_text(text_val, model_info["Best pipeline"], | |
model_info["tfidf_vector"]) | |
prediction = "Result :" + str(prediction[0]) | |
form_predict.write(prediction) | |
f_up = exp2.file_uploader("predict_csv", type=["csv"]) | |
if f_up and exp2.button("Predict"): | |
df = pd.read_csv(f_up, encoding='ISO-8859-1') | |
df_copy = df.copy() | |
predictions = predict_csv(df_copy, model_info["Best pipeline"], | |
model_info["tfidf_vector"], input_feature) | |
predict_column_name = target_data + "_prediction" | |
df[predict_column_name] = predictions | |
csv = df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() | |
href = f'<a href="data:file/csv;base64,{b64}" download="download.csv">Download Predicted file</a>' | |
exp2.markdown(href, unsafe_allow_html=True) | |
output_model = pickle.dumps(model_info["Best pipeline"]) | |
b64 = base64.b64encode(output_model).decode() | |
href = f'<a href="data:file/output_model;base64,{b64}" download="Best_model.pkl">Download Best Model .pkl File</a> ' | |
exp2.markdown(href, unsafe_allow_html=True) | |
print("completed") | |
elif target_data == input_feature: | |
st.error("Input feature and target data cannot be same") | |
except Exception as e: | |
st.error(e) | |
st.error("Something went wrong") | |
# ****************************************************** # | |
# *********************** End of NLP text Classification Section *************************************# | |
# ************************* End of Supervised Section **************************************************# | |
# **************************** UnSupervised Section (In Progress) ********************************** # | |
elif exp_type == "UnSupervised": | |
st.subheader("UnSupervised") | |
# ************ Data Analysis Code goes here ********** # | |
dataset = pd.read_csv(file_upload) | |
final_output = get_details_local(dataset) | |
cols = dataset.columns.tolist() | |
# print(dataset) | |
st.text(" ") | |
first = dataset.head(10) | |
# last = dataset.tail(10) | |
if st.button("Click here to Analyze Data"): | |
container = st.beta_expander("Data Analysis and visualization Details") | |
# c1,c2=container.beta_columns(2) | |
container.subheader("First 10 Rows") | |
container.write(first) | |
# c2.subheader("Last 10 Rows") | |
# c2.write(last) | |
container.text(" ") | |
overview_con = container.beta_container() | |
overview_con.subheader("Overview of Dataset") | |
overview_con.text(" ") | |
ov_c1, ov_c2, ov_c3 = overview_con.beta_columns(3) | |
ov_c1.write("Statistics") | |
for key, value in final_output['overview']['data_statistics'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c1.text(temp) | |
ov_c2.write("Variable Info") | |
for key, value in final_output['overview']['variable_type'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c2.text(temp) | |
ov_c3.write("Reproduction") | |
for key, value in final_output['reproduction'].items(): | |
temp = str(key) + ": " + str(value) | |
ov_c3.text(temp) | |
container.text(" ") | |
numeric_con = container.beta_container() | |
numeric_con.subheader("Numeric Variable Information") | |
numeric_con.text(" ") | |
for key, value in final_output['numerical_variable_info']['variable_info'].items(): | |
numeric_con.text(" ") | |
temp_key = "Numeric Column:" + str(key) | |
numeric_con.write(temp_key) | |
num_c1, num_c2, num_c3, num_c4 = numeric_con.beta_columns(4) | |
i = 1 | |
for key1, value1 in value.items(): | |
temp = str(key1) + ": " + str(value1) | |
if (i <= 7): | |
num_c1.text(temp) | |
elif (i > 7 and i <= 14): | |
num_c2.text(temp) | |
elif (i > 14 and i <= 21): | |
num_c3.text(temp) | |
elif i > 21 and i <= 24: | |
num_c4.text(temp) | |
elif i > 24: | |
numeric_con.plotly_chart(value1, config={'displaylogo': False}) | |
i = i + 1 | |
container.text(" ") | |
categorical_con = container.beta_container() | |
categorical_con.subheader("Categorical Variable Information") | |
categorical_con.text(" ") | |
for key, value in final_output['categorical_variable_info']['variable_info'].items(): | |
categorical_con.text(" ") | |
temp_key = "Categorical Column:" + str(key) | |
categorical_con.write(temp_key) | |
num_c1, num_c2, num_c3, num_c4 = categorical_con.beta_columns(4) | |
i = 1 | |
for key1, value1 in value.items(): | |
temp = str(key1) + ": " + str(value1) | |
if (i <= 5): | |
num_c1.text(temp) | |
elif (i > 5 and i <= 10): | |
num_c2.text(temp) | |
elif (i > 10 and i <= 15): | |
num_c3.text(temp) | |
elif i > 15 and i <= 16: | |
num_c4.text(temp) | |
elif i > 16: | |
categorical_con.plotly_chart(value1, config={'displaylogo': False}) | |
i = i + 1 | |
container.text(" ") | |
container.text("Scatter chart Matrix") | |
container.plotly_chart(final_output['scatter_chart_matrix'],config = {'displaylogo': False}) | |
container.text(" ") | |
container.text(" ") | |
corr_con = container.beta_container() | |
corr_con.subheader("Correlation Matrix Information") | |
corr_con.text(" ") | |
# corr_c1, corr_c2, corr_c3 = corr_con.beta_columns(3) | |
# j = 0 | |
for key1, value1 in final_output['correlation_matrix_info'].items(): | |
corr_con.text(" ") | |
corr_con.write(key1) | |
# col.pyplot(value) | |
corr_con.plotly_chart(value1, config={'displaylogo': False}) | |
# col.plotly_chart(value1,use_container_width=True) | |
# j=j+1 | |
container.text(" ") | |
missing_con = container.beta_container() | |
missing_con.subheader("Missing Values Information") | |
missing_con.text(" ") | |
mis_c1, mis_c2 = missing_con.beta_columns(2) | |
mis_c3, mis_c4 = missing_con.beta_columns(2) | |
k = 0 | |
for key, value in final_output['missing_values_info'].items(): | |
corr_con.text(" ") | |
col = mis_c1 | |
if k == 0: | |
col = mis_c1 | |
elif k == 1: | |
col = mis_c2 | |
elif k == 2: | |
col = mis_c3 | |
elif k == 3: | |
col = mis_c4 | |
col.write(key) | |
col.pyplot(value) | |
k = k + 1 | |
# ********************************************************# | |
# *********** Selecting Model for clustering ***********# | |
st.subheader("Select the Model") | |
model = st.selectbox(label=' ', options=['Select', 'KMeans']) | |
#********************************************************# | |
# ******* Data cleaning and checking with elbow technique using Kmeans clustering *******# | |
if model == "KMeans": | |
st.text(" ") | |
st.subheader("choose the features which you want to drop") | |
drop_features = st.multiselect('', cols) | |
st.text(" ") | |
cleaned_data, steps_dict = clean(dataset, drop_features) | |
sample_data = cleaned_data.head() | |
info['clean_data'] = sample_data | |
info['auto_drop'] = steps_dict['auto_drop'] | |
val1 = k_means(dataset, cols, drop_features, sample_data) | |
st.write("Elbow-Curve") | |
st.plotly_chart(val1, config={'displaylogo': False}) | |
# st.write("Silhouette-Score") | |
# st.plotly_chart(val2, config={'displaylogo': False}) | |
# ******************************************************************************* # | |
else: | |
pass | |
# **************************** End of UnSupervised Section ********************************** # | |
except Exception as e: | |
st.header(e) | |
if __name__ == '__main__': | |
main() | |