automl-pycaret / app.py
harikrishnad1997's picture
Upload 3 files
1bd8d17 verified
from operator import index
import streamlit as st
import plotly.express as px
import numpy as np
from pycaret.regression import setup, compare_models, pull, save_model, load_model, plot_model
# from lazypredict.Supervised import LazyRegressor
# from sklearn.model_selection import train_test_split
from pandas_profiling import ProfileReport
import pandas as pd
from streamlit_pandas_profiling import st_profile_report
import os
@st.cache
def load_data():
return pd.read_csv('dataset.csv', index_col=None)
# Rest of your code...
if os.path.exists('./dataset.csv'):
df = load_data()
with st.sidebar:
st.image("https://michael-fuchs-python.netlify.app/post/2022-01-01-automl-using-pycaret-classification_files/p133s1.png")
st.title("AutoML")
choice = st.radio("Navigation", ["Upload","Profiling","Modelling", "Download"])
st.info("This project application helps you build and explore your data.")
if choice == "Upload":
st.title("Upload Your Dataset")
file = st.file_uploader("Upload Your Dataset")
if file:
df = pd.read_csv(file, index_col=None)
df.to_csv('dataset.csv', index=None)
st.dataframe(df)
if choice == "Profiling":
st.title("Exploratory Data Analysis")
profile_df = df.profile_report()
st_profile_report(profile_df)
if choice == "Modelling":
chosen_target = st.selectbox('Choose the Target Column', df.columns)
if st.button('Run Modelling'):
print("WIP")
# h2o.init()
# df = h2o.import_file(df)
# df.describe(chunk_summary=True)
# train, test = df.split_frame(ratios=[0.8], seed = 1)
# aml = H2OAutoML(max_models =25,
# balance_classes=True,
# seed =16548846)
# aml.train(training_frame = train, y = 'y')
# lb = aml.leaderboard
# lb.head(rows=lb.nrows)
# aml.train(training_frame = train, y = 'y', leaderboard_frame = my_leaderboard_frame)
# best_model = aml.get_best_model()
# model_path = h2o.save_model(model=best_model,force=True)
setup(df.dropna(subset=chosen_target), target=chosen_target, session_id = 2774764,imputation_type = 'simple',numeric_imputation='mean',categorical_imputation='mode')
setup_df = pull()
st.dataframe(setup_df)
best_model = compare_models(n_select = 5)
compare_df = pull()
st.dataframe(compare_df)
plot_model(best_model, plot='residuals', display_format='streamlit')
plot_model(best_model, plot='feature', display_format='streamlit')
plot_model(best_model, plot='error', display_format='streamlit')
save_model(best_model, 'best_model')
# y = df[chosen_target]
# X = df.loc[:, df.columns!=chosen_target]
# X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=.5,random_state =65481254)
# reg = LazyRegressor(verbose=0,ignore_warnings=False, custom_metric=None )
# models,predictions = reg.fit(X_train, X_test, y_train, y_test)
# st.dataframe(models)
# model_dictionary = reg.provide_models(X_train,X_test,y_train,y_test)
if choice == "Download":
print("Working")
with open('best_model.pkl', 'rb') as f:
st.download_button('Download Model', f, file_name="best_model.pkl")