devinschumacher cmpatino commited on
Commit
f96cf95
0 Parent(s):

Duplicate from sklearn-docs/mean-shift-clustering

Browse files

Co-authored-by: Carlos Miguel Patiño <cmpatino@users.noreply.huggingface.co>

Files changed (5) hide show
  1. .gitattributes +34 -0
  2. .gitignore +125 -0
  3. README.md +13 -0
  4. app.py +121 -0
  5. requirements.txt +2 -0
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ .coverage
3
+ .coverage.*
4
+ .cache
5
+ nosetests.xml
6
+ coverage.xml
7
+ *.cover
8
+ *.py,cover
9
+ .hypothesis/
10
+ .pytest_cache/
11
+ cover/
12
+
13
+ # Translations
14
+ *.mo
15
+ *.pot
16
+
17
+ # Django stuff:
18
+ *.log
19
+ local_settings.py
20
+ db.sqlite3
21
+ db.sqlite3-journal
22
+
23
+ # Flask stuff:
24
+ instance/
25
+ .webassets-cache
26
+
27
+ # Scrapy stuff:
28
+ .scrapy
29
+
30
+ # Sphinx documentation
31
+ docs/_build/
32
+
33
+ # PyBuilder
34
+ .pybuilder/
35
+ target/
36
+
37
+ # Jupyter Notebook
38
+ .ipynb_checkpoints
39
+
40
+ # IPython
41
+ profile_default/
42
+ ipython_config.py
43
+
44
+ # pyenv
45
+ # For a library or package, you might want to ignore these files since the code is
46
+ # intended to run in multiple environments; otherwise, check them in:
47
+ # .python-version
48
+
49
+ # pipenv
50
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
51
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
52
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
53
+ # install all needed dependencies.
54
+ #Pipfile.lock
55
+
56
+ # poetry
57
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
58
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
59
+ # commonly ignored for libraries.
60
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
61
+ #poetry.lock
62
+
63
+ # pdm
64
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
65
+ #pdm.lock
66
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
67
+ # in version control.
68
+ # https://pdm.fming.dev/#use-with-ide
69
+ .pdm.toml
70
+
71
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
72
+ __pypackages__/
73
+
74
+ # Celery stuff
75
+ celerybeat-schedule
76
+ celerybeat.pid
77
+
78
+ # SageMath parsed files
79
+ *.sage.py
80
+
81
+ # Environments
82
+ .env
83
+ .venv
84
+ env/
85
+ venv/
86
+ ENV/
87
+ env.bak/
88
+ venv.bak/
89
+
90
+ # Spyder project settings
91
+ .spyderproject
92
+ .spyproject
93
+
94
+ # Rope project settings
95
+ .ropeproject
96
+
97
+ # mkdocs documentation
98
+ /site
99
+
100
+ # mypy
101
+ .mypy_cache/
102
+ .dmypy.json
103
+ dmypy.json
104
+
105
+ # Pyre type checker
106
+ .pyre/
107
+
108
+ # pytype static type analyzer
109
+ .pytype/
110
+
111
+ # Cython debug symbols
112
+ cython_debug/
113
+
114
+ # PyCharm
115
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
116
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
117
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
118
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
119
+ #.idea/
120
+
121
+ # VS Code
122
+ .vscode/
123
+
124
+ # pycache
125
+ __pycache__/
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mean Shift Clustering
3
+ emoji: 💩
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: 3.24.1
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: sklearn-docs/mean-shift-clustering
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import matplotlib.pyplot as plt
3
+ import numpy as np
4
+ from sklearn.cluster import MeanShift, estimate_bandwidth
5
+ from sklearn.datasets import make_blobs
6
+
7
+
8
+ def get_clusters_plot(n_blobs, quantile, cluster_std):
9
+ X, _, centers = make_blobs(
10
+ n_samples=10000, cluster_std=cluster_std, centers=n_blobs, return_centers=True
11
+ )
12
+
13
+ bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=500)
14
+
15
+ ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
16
+ ms.fit(X)
17
+ labels = ms.labels_
18
+ cluster_centers = ms.cluster_centers_
19
+
20
+ labels_unique = np.unique(labels)
21
+ n_clusters_ = len(labels_unique)
22
+
23
+ fig = plt.figure()
24
+
25
+ for k in range(n_clusters_):
26
+ my_members = labels == k
27
+ cluster_center = cluster_centers[k]
28
+ plt.scatter(X[my_members, 0], X[my_members, 1])
29
+ plt.plot(
30
+ cluster_center[0],
31
+ cluster_center[1],
32
+ "x",
33
+ markeredgecolor="k",
34
+ markersize=14,
35
+ )
36
+ plt.xlabel("Feature 1")
37
+ plt.ylabel("Feature 2")
38
+
39
+ plt.title(f"Estimated number of clusters: {n_clusters_}")
40
+
41
+ if len(centers) != n_clusters_:
42
+ message = (
43
+ '<p style="text-align: center;">'
44
+ + f"The number of estimated clusters ({n_clusters_})"
45
+ + f" differs from the true number of clusters ({n_blobs})."
46
+ + " Try changing the `Quantile` parameter.</p>"
47
+ )
48
+ else:
49
+ message = (
50
+ '<p style="text-align: center;">'
51
+ + f"The number of estimated clusters ({n_clusters_})"
52
+ + f" matches the true number of clusters ({n_blobs})!</p>"
53
+ )
54
+ return fig, message
55
+
56
+
57
+ with gr.Blocks() as demo:
58
+ gr.Markdown(
59
+ """
60
+ # Mean Shift Clustering
61
+
62
+ This space shows how to use the [Mean Shift Clustering](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html) algorithm to cluster 2D data points. You can change the parameters using the sliders and see how the model performs.
63
+
64
+ This space is based on [sklearn's original demo](https://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#sphx-glr-auto-examples-cluster-plot-mean-shift-py).
65
+ """
66
+ )
67
+ with gr.Row():
68
+ with gr.Column(scale=1):
69
+ n_blobs = gr.Slider(
70
+ minimum=2,
71
+ maximum=10,
72
+ label="Number of clusters in the data",
73
+ step=1,
74
+ value=3,
75
+ )
76
+ quantile = gr.Slider(
77
+ minimum=0,
78
+ maximum=1,
79
+ step=0.05,
80
+ value=0.2,
81
+ label="Quantile",
82
+ info="Used to determine clustering's bandwidth.",
83
+ )
84
+ cluster_std = gr.Slider(
85
+ minimum=0.1,
86
+ maximum=1,
87
+ label="Clusters' standard deviation",
88
+ step=0.1,
89
+ value=0.6,
90
+ )
91
+ with gr.Column(scale=4):
92
+ clusters_plots = gr.Plot(label="Clusters' Plot")
93
+ message = gr.HTML()
94
+
95
+ n_blobs.change(
96
+ get_clusters_plot,
97
+ [n_blobs, quantile, cluster_std],
98
+ [clusters_plots, message],
99
+ queue=False,
100
+ )
101
+ quantile.change(
102
+ get_clusters_plot,
103
+ [n_blobs, quantile, cluster_std],
104
+ [clusters_plots, message],
105
+ queue=False,
106
+ )
107
+ cluster_std.change(
108
+ get_clusters_plot,
109
+ [n_blobs, quantile, cluster_std],
110
+ [clusters_plots, message],
111
+ queue=False,
112
+ )
113
+ demo.load(
114
+ get_clusters_plot,
115
+ [n_blobs, quantile, cluster_std],
116
+ [clusters_plots, message],
117
+ queue=False,
118
+ )
119
+
120
+ if __name__ == "__main__":
121
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ scikit-learn==1.2.2
2
+ matplotlib==3.7.1