当前位置:   article > 正文

Kernel K-means

Kernel K-means
  1. import numpy as np
  2. from sklearn.base import BaseEstimator, ClusterMixin
  3. from sklearn.metrics.pairwise import pairwise_kernels
  4. from sklearn.utils import check_random_state
  5. class KernelKMeans(BaseEstimator, ClusterMixin):
  6. """
  7. Kernel K-means
  8. Reference
  9. ---------
  10. Kernel k-means, Spectral Clustering and Normalized Cuts.
  11. Inderjit S. Dhillon, Yuqiang Guan, Brian Kulis.
  12. KDD 2004.
  13. """
  14. def __init__(self, n_clusters=3, max_iter=50, tol=1e-3, random_state=None,
  15. kernel="linear", gamma=None, degree=3, coef0=1,
  16. kernel_params=None, verbose=0):
  17. self.n_clusters = n_clusters
  18. self.max_iter = max_iter
  19. self.tol = tol
  20. self.random_state = random_state
  21. self.kernel = kernel
  22. self.gamma = gamma
  23. self.degree = degree
  24. self.coef0 = coef0
  25. self.kernel_params = kernel_params
  26. self.verbose = verbose
  27. @property
  28. def _pairwise(self):
  29. return self.kernel == "precomputed"
  30. def _get_kernel(self, X, Y=None):
  31. if callable(self.kernel):
  32. params = self.kernel_params or {}
  33. else:
  34. params = {"gamma": self.gamma,
  35. "degree": self.degree,
  36. "coef0": self.coef0}
  37. return pairwise_kernels(X, Y, metric=self.kernel,
  38. filter_params=True, **params)
  39. def fit(self, X, y=None, sample_weight=None):
  40. n_samples = X.shape[0]
  41. K = self._get_kernel(X)
  42. sw = sample_weight if sample_weight else np.ones(n_samples)
  43. self.sample_weight_ = sw
  44. rs = check_random_state(self.random_state)
  45. self.labels_ = rs.randint(self.n_clusters, size=n_samples)
  46. dist = np.zeros((n_samples, self.n_clusters))
  47. self.within_distances_ = np.zeros(self.n_clusters)
  48. for it in range(self.max_iter):
  49. dist.fill(0)
  50. self._compute_dist(K, dist, self.within_distances_,
  51. update_within=True)
  52. labels_old = self.labels_
  53. self.labels_ = dist.argmin(axis=1)
  54. # Compute the number of samples whose cluster did not change
  55. # since last iteration.
  56. n_same = np.sum((self.labels_ - labels_old) == 0)
  57. if 1 - float(n_same) / n_samples < self.tol:
  58. if self.verbose:
  59. print("Converged at iteration {}".format(it + 1))
  60. break
  61. self.X_fit_ = X
  62. return self
  63. def _compute_dist(self, K, dist, within_distances, update_within):
  64. """Compute a n_samples x n_clusters distance matrix using the
  65. kernel trick."""
  66. sw = self.sample_weight_
  67. for j in range(self.n_clusters):
  68. mask = self.labels_ == j
  69. if np.sum(mask) == 0:
  70. raise ValueError("Empty cluster found, try smaller n_cluster.")
  71. denom = sw[mask].sum()
  72. denomsq = denom * denom
  73. if update_within:
  74. KK = K[mask][:, mask] # K[mask, mask] does not work.
  75. dist_j = np.sum(np.outer(sw[mask], sw[mask]) * KK / denomsq)
  76. within_distances[j] = dist_j
  77. dist[:, j] += dist_j
  78. else:
  79. dist[:, j] += within_distances[j]
  80. dist[:, j] -= 2 * np.sum(sw[mask] * K[:, mask], axis=1) / denom
  81. def predict(self, X):
  82. K = self._get_kernel(X, self.X_fit_)
  83. n_samples = X.shape[0]
  84. dist = np.zeros((n_samples, self.n_clusters))
  85. self._compute_dist(K, dist, self.within_distances_,
  86. update_within=False)
  87. return dist.argmin(axis=1)
  88. if __name__ == '__main__':
  89. from sklearn.datasets import make_blobs
  90. X, y = make_blobs(n_samples=1000, centers=5, random_state=0)
  91. km = KernelKMeans(n_clusters=5, max_iter=100, random_state=0, verbose=1)
  92. print(km.fit_predict(X)[:10])
  93. print(km.predict(X[:10]))

代码出处:https://github.com/cvjena/ITAL/blob/b7763fa8cf2aa5c5dc151d5e3a1a905223b45b3b/ital/external/kernel_kmeans.py

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Monodyee/article/detail/159628
推荐阅读
相关标签
  

闽ICP备14008679号