import gradio as gr import matplotlib.pyplot as plt import numpy as np from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.linear_model import LogisticRegression from sklearn.naive_bayes import GaussianNB def choose_model(model): if model == "Logistic Regression": return LogisticRegression(max_iter=1000, random_state=123) elif model == "Random Forest": return RandomForestClassifier(n_estimators=100, random_state=123) elif model == "Gaussian Naive Bayes": return GaussianNB() else: raise ValueError("Model is not supported.") def get_proba_plots( model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight ): clf1 = choose_model(model_1) clf2 = choose_model(model_2) clf3 = choose_model(model_3) X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]]) y = np.array([1, 1, 2, 2]) eclf = VotingClassifier( estimators=[("clf1", clf1), ("clf2", clf2), ("clf3", clf3)], voting="soft", weights=[model_1_weight, model_2_weight, model_3_weight], ) # predict class probabilities for all classifiers probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)] # get class probabilities for the first sample in the dataset class1_1 = [pr[0, 0] for pr in probas] class2_1 = [pr[0, 1] for pr in probas] # plotting N = 4 # number of groups ind = np.arange(N) # group positions width = 0.35 # bar width fig, ax = plt.subplots() # bars for classifier 1-3 p1 = ax.bar( ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k" ) p2 = ax.bar( ind + width, np.hstack(([class2_1[:-1], [0]])), width, color="lightgreen", edgecolor="k", ) # bars for VotingClassifier ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k") ax.bar( ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k" ) # plot annotations plt.axvline(2.8, color="k", linestyle="dashed") ax.set_xticks(ind + width) ax.set_xticklabels( [ f"{model_1}\nweight {model_1_weight}", f"{model_2}\nweight {model_2_weight}", f"{model_3}\nweight {model_3_weight}", "VotingClassifier\n(average probabilities)", ], rotation=40, ha="right", ) plt.ylim([0, 1]) plt.title("Class probabilities for sample 1 by different classifiers") plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left") plt.tight_layout() plt.show() return fig with gr.Blocks() as demo: gr.Markdown( """ # Class probabilities by the `VotingClassifier` This space shows the effect of the weight of different classifiers when using sklearn's [VotingClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn.ensemble.VotingClassifier). For example, suppose you set the weights as in the table below, and the models have the following predicted probabilities: | | Weights | Predicted Probabilities | |---------|:-------:|:----------------:| | Model 1 | 1 | 0.5 | | Model 2 | 2 | 0.8 | | Model 3 | 5 | 0.9 | The predicted probability by the `VotingClassifier` will be $(1*0.5 + 2*0.8 + 5*0.9) / (1 + 2 + 5)$ You can experiment with different model types and weights and see their effect on the VotingClassifier's prediction. This space is based on [sklearn’s original demo](https://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_probas.html#sphx-glr-auto-examples-ensemble-plot-voting-probas-py). """ ) with gr.Row(): with gr.Column(scale=3): with gr.Row(): model_1 = gr.Dropdown( [ "Logistic Regression", "Random Forest", "Gaussian Naive Bayes", ], label="Model 1", value="Logistic Regression", ) model_1_weight = gr.Slider( value=1, label="Model 1 Weight", max=10, step=1 ) with gr.Row(): model_2 = gr.Dropdown( [ "Logistic Regression", "Random Forest", "Gaussian Naive Bayes", ], label="Model 2", value="Random Forest", ) model_2_weight = gr.Slider( value=1, label="Model 2 Weight", max=10, step=1 ) with gr.Row(): model_3 = gr.Dropdown( [ "Logistic Regression", "Random Forest", "Gaussian Naive Bayes", ], label="Model 3", value="Gaussian Naive Bayes", ) model_3_weight = gr.Slider( value=5, label="Model 3 Weight", max=10, step=1 ) with gr.Column(scale=4): proba_plots = gr.Plot() model_1.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) model_2.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) model_3.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) model_1_weight.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) model_2_weight.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) model_3_weight.change( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) demo.load( get_proba_plots, [model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight], proba_plots, queue=False, ) if __name__ == "__main__": demo.launch()