Jensen-holm commited on
Commit
c2687ce
·
1 Parent(s): 803aad3

v1 of kmean may be complete

Browse files
cluster/distance.py CHANGED
@@ -1,5 +1,8 @@
1
  import numpy as np
2
 
 
 
 
3
 
4
  def euclidean(
5
  point: np.array,
 
1
  import numpy as np
2
 
3
+ # right now I am not using this function
4
+ # maybe get rid of it or change it to how we
5
+ # use it in our distance calculations
6
 
7
  def euclidean(
8
  point: np.array,
cluster/kmeans.py CHANGED
@@ -8,11 +8,13 @@ from cluster.clusterer import Clusterer
8
  class Kmeans(Clusterer):
9
  k: int
10
  max_iter: int
 
 
11
 
12
  def build(
13
  self,
14
  X: np.array,
15
- ) -> dict[str, np.array]:
16
  # randomly initialize centroids
17
  centroids = X[np.random.choice(
18
  X.shape[0],
@@ -31,10 +33,9 @@ class Kmeans(Clusterer):
31
  break
32
  clusters = new_clusts
33
  centroids = self.update_centroids(self.k, X, clusters)
34
- return {
35
- "clusters": clusters,
36
- "centroids": centroids,
37
- }
38
 
39
  @staticmethod
40
  def assign_clusters(
@@ -56,8 +57,10 @@ class Kmeans(Clusterer):
56
  centroids[i] = X[clusters == i].mean(axis=0)
57
  return centroids
58
 
59
- def label():
60
- ...
61
-
62
- def main(self):
63
- return self.from_dict()
 
 
 
8
  class Kmeans(Clusterer):
9
  k: int
10
  max_iter: int
11
+ centroids = None
12
+ clusters = None
13
 
14
  def build(
15
  self,
16
  X: np.array,
17
+ ) -> None:
18
  # randomly initialize centroids
19
  centroids = X[np.random.choice(
20
  X.shape[0],
 
33
  break
34
  clusters = new_clusts
35
  centroids = self.update_centroids(self.k, X, clusters)
36
+
37
+ self.clusters = clusters
38
+ self.centroids = centroids
 
39
 
40
  @staticmethod
41
  def assign_clusters(
 
57
  centroids[i] = X[clusters == i].mean(axis=0)
58
  return centroids
59
 
60
+ def to_dict(self) -> dict:
61
+ return {
62
+ "k": self.k,
63
+ "max_iter": self.max_iter,
64
+ "centroids": self.centroids,
65
+ "clusters": self.clusters,
66
+ }
cluster/kmedoids.py CHANGED
@@ -8,11 +8,8 @@ from cluster.clusterer import Clusterer
8
  class Kmedoids(Clusterer):
9
  k: int
10
 
11
- def build(self, X_train: np.array):
12
  ...
13
 
14
- def label():
15
- ...
16
-
17
- def main():
18
  ...
 
8
  class Kmedoids(Clusterer):
9
  k: int
10
 
11
+ def main(self, X):
12
  ...
13
 
14
+ def build(self, X: np.array):
 
 
 
15
  ...
cluster/main.py CHANGED
@@ -12,6 +12,8 @@ def main(
12
  args: dict,
13
  ) -> dict:
14
  cluster_alg: Clusterer = clustering_methods[args["algorithm"]]
15
- model = cluster_alg.main(X, args)
16
- model.eval(X, y)
17
- return model.to_dict()
 
 
 
12
  args: dict,
13
  ) -> dict:
14
  cluster_alg: Clusterer = clustering_methods[args["algorithm"]]
15
+
16
+ alg = cluster_alg.from_dict(args)
17
+ alg.build(X)
18
+
19
+ return alg.to_dict()