Spaces:
Sleeping
Sleeping
Commit
·
fcc4124
1
Parent(s):
3cab2dd
leaving off here, working on kmeans algorithm
Browse files- cluster/kmeans.py +32 -22
cluster/kmeans.py
CHANGED
@@ -12,29 +12,39 @@ class Kmeans(Clusterer):
|
|
12 |
|
13 |
def build(
|
14 |
self,
|
15 |
-
|
16 |
):
|
17 |
-
#
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
def label():
|
40 |
...
|
|
|
12 |
|
13 |
def build(
|
14 |
self,
|
15 |
+
X: np.array,
|
16 |
):
|
17 |
+
# randomly initialize centroids
|
18 |
+
centroids = X[np.random.choice(
|
19 |
+
X.shape[0],
|
20 |
+
self.k,
|
21 |
+
replace=False,
|
22 |
+
)]
|
23 |
+
|
24 |
+
# Calculate Euclidean distance between each data point and each centroid
|
25 |
+
# then assign each point to its closest cluster
|
26 |
+
clusters = self.assign_clusters(X, centroids)
|
27 |
+
centroids = self.update_centroids(self.k, X, clusters)
|
28 |
+
|
29 |
+
@staticmethod
|
30 |
+
def assign_clusters(
|
31 |
+
X: np.array,
|
32 |
+
centroids: np.array,
|
33 |
+
) -> np.array:
|
34 |
+
distances = np.sqrt(((X - centroids[:, np.newaxis])**2).sum(axis=2))
|
35 |
+
clusts = np.argmin(distances, axis=0)
|
36 |
+
return clusts
|
37 |
+
|
38 |
+
@staticmethod
|
39 |
+
def update_centroids(
|
40 |
+
k: int,
|
41 |
+
X: np.array,
|
42 |
+
clusters: np.array,
|
43 |
+
) -> np.array:
|
44 |
+
centroids = np.zeros((k, X.shape[1]))
|
45 |
+
for i in range(k):
|
46 |
+
centroids[i] = X[clusters == i].mean(axis=0)
|
47 |
+
return centroids
|
48 |
|
49 |
def label():
|
50 |
...
|