Spaces:

sklearn-docs
/

voting-classifier-plots

Sleeping

App Files Files Community

voting-classifier-plots / app.py

cmpatino

Add better description of the space

9d640ed over 1 year ago

raw

history blame

6.75 kB

	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.ensemble import RandomForestClassifier, VotingClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.naive_bayes import GaussianNB


	def choose_model(model):
	if model == "Logistic Regression":
	return LogisticRegression(max_iter=1000, random_state=123)
	elif model == "Random Forest":
	return RandomForestClassifier(n_estimators=100, random_state=123)
	elif model == "Gaussian Naive Bayes":
	return GaussianNB()
	else:
	raise ValueError("Model is not supported.")


	def get_proba_plots(
	model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight
	):
	clf1 = choose_model(model_1)
	clf2 = choose_model(model_2)
	clf3 = choose_model(model_3)
	X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
	y = np.array([1, 1, 2, 2])

	eclf = VotingClassifier(
	estimators=[("clf1", clf1), ("clf2", clf2), ("clf3", clf3)],
	voting="soft",
	weights=[model_1_weight, model_2_weight, model_3_weight],
	)

	# predict class probabilities for all classifiers
	probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]

	# get class probabilities for the first sample in the dataset
	class1_1 = [pr[0, 0] for pr in probas]
	class2_1 = [pr[0, 1] for pr in probas]

	# plotting

	N = 4 # number of groups
	ind = np.arange(N) # group positions
	width = 0.35 # bar width

	fig, ax = plt.subplots()

	# bars for classifier 1-3
	p1 = ax.bar(
	ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k"
	)
	p2 = ax.bar(
	ind + width,
	np.hstack(([class2_1[:-1], [0]])),
	width,
	color="lightgreen",
	edgecolor="k",
	)

	# bars for VotingClassifier
	ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k")
	ax.bar(
	ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k"
	)

	# plot annotations
	plt.axvline(2.8, color="k", linestyle="dashed")
	ax.set_xticks(ind + width)
	ax.set_xticklabels(
	[
	f"{model_1}\nweight {model_1_weight}",
	f"{model_2}\nweight {model_2_weight}",
	f"{model_3}\nweight {model_3_weight}",
	"VotingClassifier\n(average probabilities)",
	],
	rotation=40,
	ha="right",
	)
	plt.ylim([0, 1])
	plt.title("Class probabilities for sample 1 by different classifiers")
	plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left")
	plt.tight_layout()
	plt.show()
	return fig


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Class probabilities by the `VotingClassifier`

	This space shows the effect of the weight of different classifiers when using sklearn's [VotingClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn.ensemble.VotingClassifier).

	For example, suppose you set the weights as in the table below, and the models have the following predicted probabilities:

	\| \| Weights \| Predicted Probabilities \|
	\|---------\|:-------:\|:----------------:\|
	\| Model 1 \| 1 \| 0.5 \|
	\| Model 2 \| 2 \| 0.8 \|
	\| Model 3 \| 5 \| 0.9 \|

	The predicted probability by the `VotingClassifier` will be $(10.5 + 20.8 + 5*0.9) / (1 + 2 + 5)$

	You can experiment with different model types and weights and see their effect on the VotingClassifier's prediction.

	This space is based on [sklearn’s original demo](https://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_probas.html#sphx-glr-auto-examples-ensemble-plot-voting-probas-py).
	"""
	)
	with gr.Row():
	with gr.Column(scale=3):
	with gr.Row():
	model_1 = gr.Dropdown(
	[
	"Logistic Regression",
	"Random Forest",
	"Gaussian Naive Bayes",
	],
	label="Model 1",
	value="Logistic Regression",
	)
	model_1_weight = gr.Slider(
	value=1, label="Model 1 Weight", max=10, step=1
	)
	with gr.Row():
	model_2 = gr.Dropdown(
	[
	"Logistic Regression",
	"Random Forest",
	"Gaussian Naive Bayes",
	],
	label="Model 2",
	value="Random Forest",
	)
	model_2_weight = gr.Slider(
	value=1, label="Model 2 Weight", max=10, step=1
	)
	with gr.Row():
	model_3 = gr.Dropdown(
	[
	"Logistic Regression",
	"Random Forest",
	"Gaussian Naive Bayes",
	],
	label="Model 3",
	value="Gaussian Naive Bayes",
	)

	model_3_weight = gr.Slider(
	value=5, label="Model 3 Weight", max=10, step=1
	)
	with gr.Column(scale=4):
	proba_plots = gr.Plot()

	model_1.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)
	model_2.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)
	model_3.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)
	model_1_weight.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)
	model_2_weight.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)
	model_3_weight.change(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)

	demo.load(
	get_proba_plots,
	[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
	proba_plots,
	queue=False,
	)

	if __name__ == "__main__":
	demo.launch()