import streamlit as st import pandas as pd import numpy as np import pickle from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier import base64 import seaborn as sns st.write(""" # Penguin Prediction App This app predicts the **Palmer Penguin** species! Data obtained from the [palmerpenguins library](https://github.com/allisonhorst/palmerpenguins) in R by Allison Horst. """) st.sidebar.title('File Upload Features') # Collects user input features into dataframe uploaded_file = st.sidebar.file_uploader("Upload your input CSV file", type=["csv"]) if uploaded_file is not None: df = pd.read_csv(uploaded_file) st.dataframe(df) le = LabelEncoder() df.sex = le.fit_transform(df.sex) load_clf = pickle.load(open('penguins_clf.pkl', 'rb')) prediction = load_clf.predict(df) prediction_proba = load_clf.predict_proba(df) st.subheader('Prediction') penguins_species = np.array(['Adelie','Chinstrap','Gentoo']) pp = pd.DataFrame(penguins_species[prediction],columns=["prediction"]) st.write(pp) st.subheader('Prediction Probability') st.dataframe(prediction_proba) ndf = pd.concat([df,pp],axis=1) st.write(ndf) plot = sns.barplot(x ="bill_length_mm",y="bill_depth_mm",data = df ) st.pyplot(plot) def filedownload(df): csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions href = f'Download CSV File' return href st.markdown(filedownload(ndf), unsafe_allow_html=True) else: st.sidebar.title("Manual Feature input") def user_input_features(): sex = st.sidebar.selectbox('Sex',('male','female')) bill_length_mm = st.sidebar.slider('Bill length (mm)', 32.1,59.6,43.9) bill_depth_mm = st.sidebar.slider('Bill depth (mm)', 13.1,21.5,17.2) flipper_length_mm = st.sidebar.slider('Flipper length (mm)', 172.0,231.0,201.0) body_mass_g = st.sidebar.slider('Body mass (g)', 2700.0,6300.0,4207.0) data = { 'bill_length_mm': bill_length_mm, 'bill_depth_mm': bill_depth_mm, 'flipper_length_mm': flipper_length_mm, 'body_mass_g': body_mass_g, 'sex': sex} features = pd.DataFrame(data, index=[0]) return features input_df = user_input_features() st.subheader('User Input features') st.write('Awaiting CSV file to be uploaded. Currently using example input parameters (shown below).') st.write(input_df) le = LabelEncoder() input_df.sex = le.fit_transform(input_df.sex) load_clf = pickle.load(open('penguins_clf.pkl', 'rb')) prediction = load_clf.predict(input_df) prediction_proba = load_clf.predict_proba(input_df) st.subheader('Prediction') penguins_species = np.array(['Adelie','Chinstrap','Gentoo']) st.write(penguins_species[prediction]) st.subheader('Prediction Probability') st.write(prediction_proba)