CDCHealth / app.py
Beladiaamy's picture
Update app.py
b825e19 verified
raw
history blame
8.21 kB
import pickle
import pandas as pd
import shap
from shap.plots._force_matplotlib import draw_additive_plot
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
# load the model from disk
loaded_model = pickle.load(open("db_xgb.pkl", 'rb'))
# Setup SHAP
explainer = shap.Explainer(loaded_model) # PLEASE DO NOT CHANGE THIS.
# Define mapping functions
def map_HighBP(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_HighChol(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_CholCheck(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_Smoker(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_Stroke(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_HeartDiseaseorAttack(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_PhysActivity(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_Fruits(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_Veggies(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_HvyAlcoholConsump(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_AnyHealthcare(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_NoDocbcCost(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_DiffWalk(value):
mapping = {'No': 0, 'Yes': 1}
return mapping[value]
def map_Sex(value):
mapping = {'Female': 0, 'Male': 1}
return mapping[value]
def map_Education(value):
mapping = {
"Never attended school": 0,
"Grades 1-8": 1,
"Grades 9-11": 2,
"Grade 12 or GED": 3,
"College 1-3 years": 4,
"College 4+ years": 5
}
return mapping[value]
def map_Income(value):
mapping = {
"< $10,000": 0,
"$10,000 - $24,999": 1,
"$25,000 - $49,999": 2,
"$50,000 - $74,999": 3,
"$75,000 or more": 4
}
return mapping[value]
# Create the main function for server
def main_func(HighBP, HighChol, CholCheck, BMI, Smoker, Stroke, HeartDiseaseorAttack, PhysActivity, Fruits, Veggies, HvyAlcoholConsump, AnyHealthcare, NoDocbcCost, GenHlth, MentHlth, PhysHlth, DiffWalk, Sex, Age, Education, Income):
new_row = pd.DataFrame.from_dict({
'HighBP': map_HighBP(HighBP),
'HighChol': map_HighChol(HighChol),
'CholCheck': map_CholCheck(CholCheck),
'BMI': BMI,
'Smoker': map_Smoker(Smoker),
'Stroke': map_Stroke(Stroke),
'HeartDiseaseorAttack': map_HeartDiseaseorAttack(HeartDiseaseorAttack),
'PhysActivity': map_PhysActivity(PhysActivity),
'Fruits': map_Fruits(Fruits),
'Veggies': map_Veggies(Veggies),
'HvyAlcoholConsump': map_HvyAlcoholConsump(HvyAlcoholConsump),
'AnyHealthcare': map_AnyHealthcare(AnyHealthcare),
'NoDocbcCost': map_NoDocbcCost(NoDocbcCost),
'GenHlth': GenHlth,
'MentHlth': MentHlth,
'PhysHlth': PhysHlth,
'DiffWalk': map_DiffWalk(DiffWalk),
'Sex': map_Sex(Sex),
'Age': Age,
'Education': map_Education(Education),
'Income': map_Income(Income)
}, orient='index').transpose()
prob = loaded_model.predict_proba(new_row)
shap_values = explainer(new_row)
# plot = shap.force_plot(shap_values[0], matplotlib=True, figsize=(30,30), show=False)
# plot = shap.plots.waterfall(shap_values[0], max_display=6, show=False)
plot = shap.plots.bar(shap_values[0], max_display=6, order=shap.Explanation.abs, show_data='auto', show=False)
plt.tight_layout()
local_plot = plt.gcf()
plt.close()
return {"Low Chance of Diabetes": float(prob[0][0]), "High Chance of Diabetes": 1-float(prob[0][0])}, local_plot
# Create the UI
title = "**Diabetes Predictor Application** πŸͺ"
description1 = """This app takes information from subjects and predicts their diabetes likelihood. Do not use for medical diagnosis."""
description2 = """
To use the app, click on one of the examples, or adjust the values of the factors, and click on Analyze. 🀞
"""
with gr.Blocks(title=title) as demo:
gr.Markdown(f"## {title}")
gr.Markdown(description1)
gr.Markdown("""---""")
gr.Markdown(description2)
gr.Markdown("""---""")
with gr.Row():
CholCheck = gr.Radio(label="Did you check your cholestorol in the past 5 years?", choices=["No", "Yes"])
HighChol = gr.Radio(label="Do you have high cholesterol?", choices=["No", "Yes"])
with gr.Row():
DiffWalk = gr.Radio(label="Do you have serious difficulty walking or climbing stairs?", choices=["No", "Yes"])
BMI = gr.Number(label="BMI", minimum=0, maximum=98)
with gr.Row():
Smoker = gr.Radio(label="Are you a smoker?", choices=["No", "Yes"])
HvyAlcoholConsump = gr.Radio(label="Do you drink often?", choices=["No", "Yes"])
with gr.Row():
Stroke = gr.Radio(label="Have you had a stroke?", choices=["No", "Yes"])
HighBP = gr.Radio(label="Do you have high blood pressure?", choices=["No", "Yes"])
HeartDiseaseorAttack = gr.Radio(label="Do you have coronary heart disease or myocardial infarction?", choices=["No", "Yes"])
with gr.Row():
PhysActivity = gr.Radio(label="Did you partake in physical activity in the past 30 days?", choices=["No", "Yes"])
Fruits = gr.Radio(label="Do you consume fruit 1 or more times per day?", choices=["No", "Yes"])
Veggies = gr.Radio(label="Do you consume vegetables 1 or more times per day?", choices=["No", "Yes"])
with gr.Row():
AnyHealthcare = gr.Radio(label="Do you have any kind of health care coverage?", choices=["No", "Yes"])
NoDocbcCost = gr.Radio(label="Was there a time in the past 12 months when you needed to see a doctor but could not because of cost?", choices=["No", "Yes"])
with gr.Row():
MentHlth = gr.Number(label="How many days in the past 30 days did you have poor mental health?")
PhysHlth = gr.Number(label="How many days in the past 30 days did you have poor physical health?")
GenHlth = gr.Slider(label="In general, rank your overall health on a scale:", step=1)
with gr.Row():
Sex = gr.Dropdown(label="Sex", choices=["Female", "Male"])
Age = gr.Number(label="Age")
with gr.Row():
Education = gr.Dropdown(label="Education Level", choices=["Never attended school", "Grades 1-8", "Grades 9-11", "Grade 12 or GED", "College 1-3 years", "College 4+ years"])
Income = gr.Dropdown(label="Income Level", choices=["< $10,000", "$10,000 - $24,999", "$25,000 - $49,999", "$50,000 - $74,999", "$75,000 or more"])
with gr.Column(visible=True) as output_col:
label = gr.Label(label = "Predicted Label")
submit_btn = gr.Button("Analyze")
with gr.Column(visible=True) as output_col:
label = gr.Label(label = "Predicted Label")
local_plot = gr.Plot(label = 'Shap:')
submit_btn.click(
main_func,
[HighBP, HighChol, CholCheck, BMI, Smoker, Stroke, HeartDiseaseorAttack, PhysActivity, Fruits, Veggies,
HvyAlcoholConsump, AnyHealthcare, NoDocbcCost, GenHlth, MentHlth, PhysHlth, DiffWalk, Sex, Age, Education, Income],
[label,local_plot],api_name="Diabetes Predictor"
)
gr.Markdown("### Click on any of the examples below to see how it works:")
gr.Examples([["No", "No", "Yes", 22, "No", "No", "No", "Yes", "Yes", "Yes", "No", "No", "Yes", 3, 25, 23, "No", "Female", 22, "Grade 12 or GED", "$25,000 - $49,999"],
["Yes", "Yes", "Yes", 30, "Yes", "Yes", "Yes", "No", "No", "No", "Yes", "Yes", "No", 2, 20, 23, "No", "No", 21, "College 4+ years", "$75,000 or more"]],
[HighBP, HighChol, CholCheck, BMI, Smoker, Stroke, HeartDiseaseorAttack, PhysActivity, Fruits, Veggies, HvyAlcoholConsump,
AnyHealthcare, NoDocbcCost, GenHlth, MentHlth, PhysHlth, DiffWalk, Sex, Age, Education, Income],
[label,local_plot], main_func, cache_examples=True)
demo.launch()