Spaces:

nielsgl
/

sklearn-lm-l1-l2-sparsity

Runtime error

App Files Files Community

nielsgl commited on Apr 4, 2023

Commit

a255cdf

•

1 Parent(s): a2cb80c

update project

Browse files

Files changed (7) hide show

.pre-commit-config.yaml +34 -0
.python-version +1 -0
app.py +186 -0
poetry.lock +0 -0
poetry.toml +2 -0
pyproject.toml +53 -0
requirements.txt +74 -0

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,34 @@

+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: check-yaml
+      # - id: check-added-large-files
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      # - id: black
+      - id: black-jupyter
+  - repo: https://github.com/pycqa/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: isort (python)
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.3.1
+    hooks:
+      - id: pyupgrade
+        args: [--py311-plus]
+  - repo: https://github.com/nbQA-dev/nbQA
+    rev: 1.7.0
+    hooks:
+      - id: nbqa-isort
+      - id: nbqa-black
+      - id: nbqa-pyupgrade
+        args: [--py311-plus]
+default_language_version:
+  python: python3.11

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11.1

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn import datasets
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+rng = np.random.default_rng(0)
+X, y = datasets.load_digits(return_X_y=True)
+X = StandardScaler().fit_transform(X)
+# classify small against large digits
+y = (y > 4).astype(int)
+# l1_ratio = 0.5  # L1 weight in the Elastic-Net regularization
+md_description = """
+# L1 Penalty and Sparsity in Logistic Regression
+Comparison of the sparsity (percentage of zero coefficients) of solutions when L1, L2 and Elastic-Net penalty are used for different values of C. We can see that large values of C give more freedom to the model. Conversely, smaller values of C constrain the model more. In the L1 penalty case, this leads to sparser solutions. As expected, the Elastic-Net penalty sparsity is between that of L1 and L2.
+We classify 8x8 images of digits into two classes: 0-4 against 5-9. The visualization shows coefficients of the models for varying C.
+"""
+def make_regression(l1_ratio):
+    fig, axes = plt.subplots(3, 3)
+    # Set regularization parameter
+    for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
+        # Increase tolerance for short training time
+        clf_l1_LR = LogisticRegression(C=C, penalty="l1", tol=0.01, solver="saga")
+        clf_l2_LR = LogisticRegression(C=C, penalty="l2", tol=0.01, solver="saga")
+        clf_en_LR = LogisticRegression(
+            C=C, penalty="elasticnet", solver="saga", l1_ratio=l1_ratio, tol=0.01
+        )
+        clf_l1_LR.fit(X, y)
+        clf_l2_LR.fit(X, y)
+        clf_en_LR.fit(X, y)
+        coef_l1_LR = clf_l1_LR.coef_.ravel()
+        coef_l2_LR = clf_l2_LR.coef_.ravel()
+        coef_en_LR = clf_en_LR.coef_.ravel()
+        # coef_l1_LR contains zeros due to the
+        # L1 sparsity inducing norm
+        sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
+        sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
+        sparsity_en_LR = np.mean(coef_en_LR == 0) * 100
+        print(f"C={C:.2f}")
+        print(f"{'Sparsity with L1 penalty:':<40} {sparsity_l1_LR:2f}%")
+        print(f"{'Sparsity with Elastic-Net penalty:':<40} {sparsity_en_LR:.2f}%")
+        print(f"{'Sparsity with L2 penalty:':<40} {sparsity_l2_LR:.2f}%")
+        print(f"{'Score with L1 penalty:':<40} {clf_l1_LR.score(X, y):.2f}")
+        print(f"{'Score with Elastic-Net penalty:':<40} {clf_en_LR.score(X, y):.2f}")
+        print(f"{'Score with L2 penalty:':<40} {clf_l2_LR.score(X, y):.2f}")
+        log_out = f"""
+        C={C:.2f}
+        {'Sparsity with L1 penalty:':<40} {sparsity_l1_LR:2f}%
+        {'Sparsity with Elastic-Net penalty:':<40} {sparsity_en_LR:.2f}%
+        {'Sparsity with L2 penalty:':<40} {sparsity_l2_LR:.2f}%
+        {'Score with L1 penalty:':<40} {clf_l1_LR.score(X, y):.2f}
+        {'Score with Elastic-Net penalty:':<40} {clf_en_LR.score(X, y):.2f}
+        {'Score with L2 penalty:':<40} {clf_l2_LR.score(X, y):.2f}
+        """
+        if i == 0:
+            axes_row[0].set_title("L1 penalty")
+            axes_row[1].set_title(f"Elastic-Net\nl1/l2_ratio = {l1_ratio}")
+            axes_row[2].set_title("L2 penalty")
+        for ax, coefs in zip(axes_row, [coef_l1_LR, coef_en_LR, coef_l2_LR]):
+            ax.imshow(
+                np.abs(coefs.reshape(8, 8)),
+                interpolation="nearest",
+                cmap="binary",
+                vmax=1,
+                vmin=0,
+            )
+            ax.set_xticks(())
+            ax.set_yticks(())
+        axes_row[0].set_ylabel(f"{C=}")
+    return fig, log_out, make_example(l1_ratio)
+def make_example(l1_ratio):
+    return f"""
+    With the following code you can reproduce this example with the current values of the sliders and the same data in a notebook:
+    ```python
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from sklearn.linear_model import LogisticRegression
+    from sklearn import datasets
+    from sklearn.preprocessing import StandardScaler
+    rng = np.random.default_rng(0)
+    X, y = datasets.load_digits(return_X_y=True)
+    X = StandardScaler().fit_transform(X)
+    # classify small against large digits
+    y = (y > 4).astype(int)
+    l1_ratio = 0.5  # L1 weight in the Elastic-Net regularization
+    fig, axes = plt.subplots(3, 3)
+    # Set regularization parameter
+    for i, (C, axes_row) in enumerate(zip((1, 0.1, 0.01), axes)):
+        # Increase tolerance for short training time
+        clf_l1_LR = LogisticRegression(C=C, penalty="l1", tol=0.01, solver="saga")
+        clf_l2_LR = LogisticRegression(C=C, penalty="l2", tol=0.01, solver="saga")
+        clf_en_LR = LogisticRegression(
+            C=C, penalty="elasticnet", solver="saga", l1_ratio=l1_ratio, tol=0.01
+        )
+        clf_l1_LR.fit(X, y)
+        clf_l2_LR.fit(X, y)
+        clf_en_LR.fit(X, y)
+        coef_l1_LR = clf_l1_LR.coef_.ravel()
+        coef_l2_LR = clf_l2_LR.coef_.ravel()
+        coef_en_LR = clf_en_LR.coef_.ravel()
+        # coef_l1_LR contains zeros due to the
+        # L1 sparsity inducing norm
+        sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
+        sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
+        sparsity_en_LR = np.mean(coef_en_LR == 0) * 100
+        print(f"C={{C:.2f}}")
+        print(f"{{'Sparsity with L1 penalty:':<40}} {{sparsity_l1_LR:2f}}%\")
+        print(f"{{'Sparsity with Elastic-Net penalty:':<40}} {{sparsity_en_LR:.2f}}%")
+        print(f"{{'Sparsity with L2 penalty:':<40}} {{sparsity_l2_LR:.2f}}%")
+        print(f"{{'Score with L1 penalty:':<40}} {{clf_l1_LR.score(X, y):.2f}}")
+        print(f"{{'Score with Elastic-Net penalty:':<40}} {{clf_en_LR.score(X, y):.2f}}")
+        print(f"{{'Score with L2 penalty:':<40}} {{clf_l2_LR.score(X, y):.2f}}")
+        if i == 0:
+            axes_row[0].set_title("L1 penalty")
+            axes_row[1].set_title(f"Elastic-Net\\nl1/l2_ratio = {l1_ratio}")
+            axes_row[2].set_title("L2 penalty")
+        for ax, coefs in zip(axes_row, [coef_l1_LR, coef_en_LR, coef_l2_LR]):
+            ax.imshow(
+                np.abs(coefs.reshape(8, 8)),
+                interpolation="nearest",
+                cmap="binary",
+                vmax=1,
+                vmin=0,
+            )
+            ax.set_xticks(())
+            ax.set_yticks(())
+        axes_row[0].set_ylabel(f"{{C=}}")
+    plt.show()
+    ```
+    """
+with gr.Blocks() as demo:
+    with gr.Row():
+        gr.Markdown(md_description)
+    with gr.Row():
+        with gr.Column():
+            ratio_slider = gr.Slider(minimum=0, maximum=1, label="L1/L2 ratio", step=0.1, value=0.5)
+            button = gr.Button(value="Generate")
+        with gr.Column():
+            plot = gr.Plot(label="Output")
+            log = gr.Markdown("")
+    with gr.Row():
+        example = gr.Markdown(make_example(ratio_slider.value))
+        button.click(make_regression, inputs=[ratio_slider], outputs=[plot, log, example])
+        ratio_slider.change(fn=make_regression, inputs=[ratio_slider], outputs=[plot, log, example])
+demo.launch()

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

poetry.toml ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [virtualenvs]
2	+ in-project = true

pyproject.toml ADDED Viewed

	@@ -0,0 +1,53 @@

+[tool.poetry]
+name = "sklearn-decision-tree-regression"
+version = "0.1.0"
+description = "Hugging Face Scikit Learn Demos"
+authors = ["Niels van Galen Last <nvangalenlast@gmail.com>"]
+license = "MIT"
+readme = "README.md"
+# packages = [{ include = "huggingface_sklearn" }]
+[tool.poetry.dependencies]
+python = ">=3.8.9,<3.12"
+numpy = "^1.24.2"
+scikit-learn = "^1.2.2"
+matplotlib = "^3.7.1"
+plotly = "^5.14.0"
+gradio = "^3.24.1"
+[tool.poetry.group.dev.dependencies]
+black = { extras = ["jupyter"], version = "^23.3.0" }
+isort = "^5.12.0"
+pre-commit = "^3.2.1"
+pylint = "^2.17.1"
+pytest = "^7.2.2"
+jupyterlab = "^3.6.3"
+jupyterlab-widgets = "^3.0.7"
+ipywidgets = "^8.0.6"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+[tool.black]
+line-length = 100
+target_version = ['py311']
+include = '\.py$'
+[tool.isort]
+profile = "black"
+# force_single_line = "false"
+force_sort_within_sections = "true"
+line_length = 100
+[tool.pylint]
+[tool.pylint.messages_control]
+#line-too-long='off'
+disable = """
+    invalid-name,
+    logging-fstring-interpolation,
+    missing-class-docstring,
+    missing-function-docstring,
+    missing-module-docstring,
+    """

requirements.txt ADDED Viewed

	@@ -0,0 +1,74 @@

+aiofiles==22.1.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+aiohttp==3.8.4 ; python_full_version >= "3.8.9" and python_version < "3.12"
+aiosignal==1.3.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+altair==4.2.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+anyio==3.6.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+async-timeout==4.0.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+attrs==22.2.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+certifi==2022.12.7 ; python_full_version >= "3.8.9" and python_version < "3.12"
+charset-normalizer==3.1.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+click==8.1.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+colorama==0.4.6 ; python_full_version >= "3.8.9" and python_version < "3.12" and platform_system == "Windows"
+contourpy==1.0.7 ; python_full_version >= "3.8.9" and python_version < "3.12"
+cycler==0.11.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+entrypoints==0.4 ; python_full_version >= "3.8.9" and python_version < "3.12"
+fastapi==0.95.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+ffmpy==0.3.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+filelock==3.10.7 ; python_full_version >= "3.8.9" and python_version < "3.12"
+fonttools==4.39.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+frozenlist==1.3.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+fsspec==2023.3.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+gradio-client==0.0.5 ; python_full_version >= "3.8.9" and python_version < "3.12"
+gradio==3.24.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+h11==0.14.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+httpcore==0.16.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+httpx==0.23.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+huggingface-hub==0.13.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+idna==3.4 ; python_full_version >= "3.8.9" and python_version < "3.12"
+importlib-resources==5.12.0 ; python_full_version >= "3.8.9" and python_version < "3.10"
+jinja2==3.1.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+joblib==1.2.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+jsonschema==4.17.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+kiwisolver==1.4.4 ; python_full_version >= "3.8.9" and python_version < "3.12"
+linkify-it-py==2.0.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+markdown-it-py==2.2.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+markdown-it-py[linkify]==2.2.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+markupsafe==2.1.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+matplotlib==3.7.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+mdit-py-plugins==0.3.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+mdurl==0.1.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+multidict==6.0.4 ; python_full_version >= "3.8.9" and python_version < "3.12"
+numpy==1.24.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+orjson==3.8.9 ; python_full_version >= "3.8.9" and python_version < "3.12"
+packaging==23.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pandas==1.5.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pillow==9.5.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pkgutil-resolve-name==1.3.10 ; python_full_version >= "3.8.9" and python_version < "3.9"
+plotly==5.14.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pydantic==1.10.7 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pydub==0.25.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pyparsing==3.0.9 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pyrsistent==0.19.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+python-dateutil==2.8.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+python-multipart==0.0.6 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pytz==2023.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+pyyaml==6.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+requests==2.28.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+rfc3986[idna2008]==1.5.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+scikit-learn==1.2.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+scipy==1.9.3 ; python_full_version >= "3.8.9" and python_version < "3.12"
+semantic-version==2.10.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+six==1.16.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+sniffio==1.3.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+starlette==0.26.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+tenacity==8.2.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+threadpoolctl==3.1.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+toolz==0.12.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+tqdm==4.65.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+typing-extensions==4.5.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+uc-micro-py==1.0.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+urllib3==1.26.15 ; python_full_version >= "3.8.9" and python_version < "3.12"
+uvicorn==0.21.1 ; python_full_version >= "3.8.9" and python_version < "3.12"
+websockets==11.0 ; python_full_version >= "3.8.9" and python_version < "3.12"
+yarl==1.8.2 ; python_full_version >= "3.8.9" and python_version < "3.12"
+zipp==3.15.0 ; python_full_version >= "3.8.9" and python_version < "3.10"