Spaces:

nielsgl
/

sklearn-lm-l1-l2-sparsity

Runtime error

App Files Files Community

sklearn-lm-l1-l2-sparsity / app.py

nielsgl

update project

a255cdf over 1 year ago

raw

history blame

7.07 kB

	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn import datasets
	from sklearn.linear_model import LogisticRegression
	from sklearn.preprocessing import StandardScaler

	rng = np.random.default_rng(0)

	X, y = datasets.load_digits(return_X_y=True)

	X = StandardScaler().fit_transform(X)

	# classify small against large digits
	y = (y > 4).astype(int)

	# l1_ratio = 0.5 # L1 weight in the Elastic-Net regularization

	md_description = """
	# L1 Penalty and Sparsity in Logistic Regression

	Comparison of the sparsity (percentage of zero coefficients) of solutions when L1, L2 and Elastic-Net penalty are used for different values of C. We can see that large values of C give more freedom to the model. Conversely, smaller values of C constrain the model more. In the L1 penalty case, this leads to sparser solutions. As expected, the Elastic-Net penalty sparsity is between that of L1 and L2.

	We classify 8x8 images of digits into two classes: 0-4 against 5-9. The visualization shows coefficients of the models for varying C.
	"""


	def make_regression(l1_ratio):
	fig, axes = plt.subplots(3, 3)

	# Set regularization parameter
	for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
	# Increase tolerance for short training time
	clf_l1_LR = LogisticRegression(C=C, penalty="l1", tol=0.01, solver="saga")
	clf_l2_LR = LogisticRegression(C=C, penalty="l2", tol=0.01, solver="saga")
	clf_en_LR = LogisticRegression(
	C=C, penalty="elasticnet", solver="saga", l1_ratio=l1_ratio, tol=0.01
	)
	clf_l1_LR.fit(X, y)
	clf_l2_LR.fit(X, y)
	clf_en_LR.fit(X, y)

	coef_l1_LR = clf_l1_LR.coef_.ravel()
	coef_l2_LR = clf_l2_LR.coef_.ravel()
	coef_en_LR = clf_en_LR.coef_.ravel()

	# coef_l1_LR contains zeros due to the
	# L1 sparsity inducing norm
	sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
	sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
	sparsity_en_LR = np.mean(coef_en_LR == 0) * 100

	print(f"C={C:.2f}")
	print(f"{'Sparsity with L1 penalty:':<40} {sparsity_l1_LR:2f}%")
	print(f"{'Sparsity with Elastic-Net penalty:':<40} {sparsity_en_LR:.2f}%")
	print(f"{'Sparsity with L2 penalty:':<40} {sparsity_l2_LR:.2f}%")
	print(f"{'Score with L1 penalty:':<40} {clf_l1_LR.score(X, y):.2f}")
	print(f"{'Score with Elastic-Net penalty:':<40} {clf_en_LR.score(X, y):.2f}")
	print(f"{'Score with L2 penalty:':<40} {clf_l2_LR.score(X, y):.2f}")

	log_out = f"""
	C={C:.2f}
	{'Sparsity with L1 penalty:':<40} {sparsity_l1_LR:2f}%
	{'Sparsity with Elastic-Net penalty:':<40} {sparsity_en_LR:.2f}%
	{'Sparsity with L2 penalty:':<40} {sparsity_l2_LR:.2f}%
	{'Score with L1 penalty:':<40} {clf_l1_LR.score(X, y):.2f}
	{'Score with Elastic-Net penalty:':<40} {clf_en_LR.score(X, y):.2f}
	{'Score with L2 penalty:':<40} {clf_l2_LR.score(X, y):.2f}
	"""

	if i == 0:
	axes_row[0].set_title("L1 penalty")
	axes_row[1].set_title(f"Elastic-Net\nl1/l2_ratio = {l1_ratio}")
	axes_row[2].set_title("L2 penalty")

	for ax, coefs in zip(axes_row, [coef_l1_LR, coef_en_LR, coef_l2_LR]):
	ax.imshow(
	np.abs(coefs.reshape(8, 8)),
	interpolation="nearest",
	cmap="binary",
	vmax=1,
	vmin=0,
	)
	ax.set_xticks(())
	ax.set_yticks(())

	axes_row[0].set_ylabel(f"{C=}")

	return fig, log_out, make_example(l1_ratio)


	def make_example(l1_ratio):
	return f"""
	With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:

	```python
	import numpy as np
	import matplotlib.pyplot as plt

	from sklearn.linear_model import LogisticRegression
	from sklearn import datasets
	from sklearn.preprocessing import StandardScaler

	rng = np.random.default_rng(0)

	X, y = datasets.load_digits(return_X_y=True)

	X = StandardScaler().fit_transform(X)

	# classify small against large digits
	y = (y > 4).astype(int)

	l1_ratio = 0.5 # L1 weight in the Elastic-Net regularization

	fig, axes = plt.subplots(3, 3)

	# Set regularization parameter
	for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
	# Increase tolerance for short training time
	clf_l1_LR = LogisticRegression(C=C, penalty="l1", tol=0.01, solver="saga")
	clf_l2_LR = LogisticRegression(C=C, penalty="l2", tol=0.01, solver="saga")
	clf_en_LR = LogisticRegression(
	C=C, penalty="elasticnet", solver="saga", l1_ratio=l1_ratio, tol=0.01
	)
	clf_l1_LR.fit(X, y)
	clf_l2_LR.fit(X, y)
	clf_en_LR.fit(X, y)

	coef_l1_LR = clf_l1_LR.coef_.ravel()
	coef_l2_LR = clf_l2_LR.coef_.ravel()
	coef_en_LR = clf_en_LR.coef_.ravel()

	# coef_l1_LR contains zeros due to the
	# L1 sparsity inducing norm

	sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
	sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
	sparsity_en_LR = np.mean(coef_en_LR == 0) * 100

	print(f"C={{C:.2f}}")
	print(f"{{'Sparsity with L1 penalty:':<40}} {{sparsity_l1_LR:2f}}%\")
	print(f"{{'Sparsity with Elastic-Net penalty:':<40}} {{sparsity_en_LR:.2f}}%")
	print(f"{{'Sparsity with L2 penalty:':<40}} {{sparsity_l2_LR:.2f}}%")
	print(f"{{'Score with L1 penalty:':<40}} {{clf_l1_LR.score(X, y):.2f}}")
	print(f"{{'Score with Elastic-Net penalty:':<40}} {{clf_en_LR.score(X, y):.2f}}")
	print(f"{{'Score with L2 penalty:':<40}} {{clf_l2_LR.score(X, y):.2f}}")

	if i == 0:
	axes_row[0].set_title("L1 penalty")
	axes_row[1].set_title(f"Elastic-Net\\nl1/l2_ratio = {l1_ratio}")
	axes_row[2].set_title("L2 penalty")

	for ax, coefs in zip(axes_row, [coef_l1_LR, coef_en_LR, coef_l2_LR]):
	ax.imshow(
	np.abs(coefs.reshape(8, 8)),
	interpolation="nearest",
	cmap="binary",
	vmax=1,
	vmin=0,
	)
	ax.set_xticks(())
	ax.set_yticks(())

	axes_row[0].set_ylabel(f"{{C=}}")
	plt.show()
	```
	"""


	with gr.Blocks() as demo:
	with gr.Row():
	gr.Markdown(md_description)
	with gr.Row():
	with gr.Column():
	ratio_slider = gr.Slider(minimum=0, maximum=1, label="L1/L2 ratio", step=0.1, value=0.5)
	button = gr.Button(value="Generate")
	with gr.Column():
	plot = gr.Plot(label="Output")
	log = gr.Markdown("")

	with gr.Row():
	example = gr.Markdown(make_example(ratio_slider.value))
	button.click(make_regression, inputs=[ratio_slider], outputs=[plot, log, example])
	ratio_slider.change(fn=make_regression, inputs=[ratio_slider], outputs=[plot, log, example])

	demo.launch()