import gradio as gr import hopsworks import joblib import pandas as pd import matplotlib.pyplot as plt import numpy as np import shap from sklearn.pipeline import make_pipeline import seaborn as sns feature_names = ["Age", "BMI", "HbA1c", "Blood Glucose"] project = hopsworks.login(project="SonyaStern_Lab1") fs = project.get_feature_store() print("trying to dl model") mr = project.get_model_registry() model = mr.get_model("diabetes_model", version=1) model_dir = model.download() model = joblib.load(model_dir + "/diabetes_model.pkl") print("Model downloaded") diabetes_fg = fs.get_feature_group(name="diabetes_gan", version=1) query = diabetes_fg.select_all() # feature_view = fs.get_or_create_feature_view(name="diabetes", feature_view = fs.get_or_create_feature_view( name="diabetes_gan", version=1, description="Read from Diabetes dataset", labels=["diabetes"], query=query, ) diabetes_df = pd.DataFrame(diabetes_fg.read()) with gr.Blocks() as demo: with gr.Row(): gr.HTML(value="

Diabetes prediction

") with gr.Row(): with gr.Column(): age_input = gr.Number(label="age") bmi_input = gr.Slider(10, 100, label="bmi", info="Body Mass Index") hba1c_input = gr.Slider( 3.5, 9, label="hba1c_level", info="Glycated Haemoglobin" ) blood_glucose_input = gr.Slider( 80, 300, label="blood_glucose_level", info="Blood Glucose Level" ) existent_info_input = gr.Radio( ["yes", "no", "Don't know"], label="Do you already know if you have diabetes? (This will not be used for the prediction)", ) consent_input = gr.Checkbox( info="I consent that my personal data will be saved and potentially be used for the model training", label="accept", ) btn = gr.Button("Submit") with gr.Column(): with gr.Row(): output = gr.Text(label="Model prediction") with gr.Row(): mean_plot = gr.Plot() with gr.Row(): with gr.Accordion("See model explanability", open=False): with gr.Row(): with gr.Column(): waterfall_plot = gr.Plot() with gr.Column(): summary_plot = gr.Plot() with gr.Row(): with gr.Column(): importance_plot = gr.Plot() with gr.Column(): decision_plot = gr.Plot() def submit_inputs( age_input, bmi_input, hba1c_input, blood_glucose_input, existent_info_input, consent_input, ): df = pd.DataFrame( [[age_input, bmi_input, hba1c_input, blood_glucose_input]], columns=["age", "bmi", "hba1c_level", "blood_glucose_level"], ) res = model.predict(df) mean_for_age = diabetes_df[ (diabetes_df["diabetes"] == 0) & (diabetes_df["age"] == age_input) ].mean() print( "your bmi is:", bmi_input, "the mean for ur age is :", mean_for_age["bmi"] ) categories = ["BMI", "HbA1c", "Blood Level"] fig, ax = plt.subplots() bar_width = 0.35 indices = np.arange(len(categories)) ax.bar( indices, [ mean_for_age.bmi, mean_for_age.hba1c_level, mean_for_age.blood_glucose_level, ], bar_width, label="Reference", color="b", alpha=0.7, ) ax.bar( indices + bar_width, [bmi_input, hba1c_input, blood_glucose_input], bar_width, label="User", color="r", alpha=0.7, ) ax.legend() ax.set_xlabel("Variables") ax.set_ylabel("Values") ax.set_title("Comparison with average non-diabetic values for your age") ax.set_xticks(indices + bar_width / 2) ax.set_xticklabels(categories) ## explainability plots rf_classifier = model.named_steps["randomforestclassifier"] transformer_pipeline = make_pipeline( *[ step for name, step in model.named_steps.items() if name != "randomforestclassifier" ] ) transformed_df = transformer_pipeline.transform(df) # Generate the SHAP waterfall plot for fig2 explainer = shap.TreeExplainer(rf_classifier) shap_values = explainer.shap_values( transformed_df ) # Compute SHAP values directly on the DataFrame predicted_class = rf_classifier.predict(transformed_df)[0] shap_values_for_predicted_class = shap_values[predicted_class] # Select the SHAP values for the first instance and the positive class shap_explanation = shap.Explanation( values=shap_values_for_predicted_class[0], base_values=explainer.expected_value[predicted_class], data=df.iloc[0], feature_names=["age", "bmi", "hba1c", "glucose"], ) fig2 = plt.figure(figsize=(3, 3)) # Create a new figure for SHAP plot fig2.tight_layout() plt.gca().set_position((0, 0, 1, 1)) plt.title("SHAP Waterfall Plot") # Optionally set a title for the SHAP plot plt.tight_layout() plt.tick_params(axis="y", labelsize=3) shap.waterfall_plot(shap_explanation) fig3 = plt.figure(figsize=(3, 3)) plt.title("SHAP Summary Plot") shap.summary_plot( shap_values, features=transformed_df, feature_names=["age", "bmi", "hba1c", "glucose"], ) fig4 = plt.figure(figsize=(4, 3)) feature_importances = rf_classifier.feature_importances_ plt.title("Feature Importances") sns.barplot(x=feature_importances, y=["age", "bmi", "hba1c", "glucose"]) fig5 = plt.figure(figsize=(3, 3)) plt.title("SHAP Interaction Plot") shap.decision_plot( explainer.expected_value[predicted_class], shap_values_for_predicted_class, df.iloc[0], ) ## save user's data in hopsworks if consent_input == True: user_data_fg = fs.get_or_create_feature_group( name="user_diabetes_data", version=1, primary_key=["age", "bmi", "hba1c_level", "blood_glucose_level"], description="Submitted user data", ) user_data_df = df.copy() user_data_df["diabetes"] = existent_info_input user_data_fg.insert(user_data_df) print("inserted new user data to hopsworks", user_data_df) return res, fig, fig2, fig3, fig4, fig5 btn.click( submit_inputs, inputs=[ age_input, bmi_input, hba1c_input, blood_glucose_input, existent_info_input, consent_input, ], outputs=[ output, mean_plot, waterfall_plot, summary_plot, importance_plot, decision_plot, ], ) demo.launch()