当前位置:   article > 正文

python进行聚类(scikit-lean、scipy)_threecircles聚类数据集下载

threecircles聚类数据集下载
用于聚类的数据集
  1. %matplotlib inline
  2. import scipy.io as sio
  3. import matplotlib.pyplot as plt
  4. '''
  5. 各种聚类数据
  6. '''
  7. #two_cluster
  8. def two_cluster():
  9.     two_cluster=u'cluster_data/two_cluster.mat'
  10.     two_cluster=sio.loadmat(two_cluster)['X'].T
  11.     data = two_cluster
  12.     return data
  13. #three_cluster
  14. def three_cluster():
  15.     path=u'cluster_data/three_cluster.mat'
  16.     three_cluster=sio.loadmat(path)['X'].T
  17.     data = three_cluster    
  18.     return data
  19. #five_cluster
  20. def five_cluster():
  21.     path=u'cluster_data/five_cluster.mat'
  22.     five_cluster=sio.loadmat(path)
  23.     x=five_cluster['x'] #得到的数据为二行n列
  24.     y=five_cluster['y'] #到的数据为一行n列
  25.     data = np.vstack((x,y)).T #先垂直合并,而后转置
  26.     #data = np.array([x[0,:],x[1,:],y[0,:]]).T #list与array互换
  27.     return data
  28. #spiral
  29. def spiral():
  30.     path=u'cluster_data/spiral.mat'
  31.     spiral=sio.loadmat(path)['spiral']
  32.     spiral = spiral[0::3,:] #每隔3行取一个数据
  33.     data = spiral
  34.     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换
  35.     return data
  36. #spiral_unbalance
  37. def spiral_unbalance():
  38.     path=u'cluster_data/spiral_unbalance.mat'
  39.     spiral_unbalance=sio.loadmat(path)['spiral_unbalance']
  40.     spiral_unbalance = spiral_unbalance[0::3,:] #每隔3行取一个数据
  41.     data = spiral_unbalance
  42.     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换
  43.     return data
  44. #ThreeCircles
  45. def ThreeCircles():
  46.     path=u'cluster_data/ThreeCircles.mat'
  47.     ThreeCircles=sio.loadmat(path)['ThreeCircles']
  48.     ThreeCircles = ThreeCircles[0::3,:] #每隔3行取一个数据
  49.     data = ThreeCircles
  50.     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换
  51.     return data
  52. #Twomoons
  53. def Twomoons():
  54.     path=u'cluster_data/Twomoons.mat'
  55.     Twomoons=sio.loadmat(path)['Twomoons']
  56.     Twomoons = Twomoons[0::3,:] #每隔3行取一个数据
  57.     data = Twomoons
  58.     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换
  59.     plt.scatter(data[:,0],data[:,1],c=data[:,2])
  60.     return data
  61. #Twomoons1
  62. def Twomoons1():
  63.     path=u'cluster_data/Twomoons.mat'
  64.     Twomoons1=sio.loadmat(path)['Twomoons']
  65.     Twomoons1 = Twomoons1[0::3,:] #每隔3行取一个数据
  66.     data = Twomoons1
  67.     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换
  68.     return data
  69. def test():
  70.     print 'test'
  71. def show_all():
  72.     plt.figure(figsize=(16,8))
  73.     #动态调用方法
  74.     func_name_list = ['two_cluster','three_cluster','five_cluster','spiral','spiral_unbalance','ThreeCircles','Twomoons','Twomoons1']
  75.     for i in range(8):
  76.         data_list.append(eval(func_name_list[i])())
  77.     #动态画图
  78.     for i in range(8):
  79.         data = data_list[i]
  80.         plt.subplot(2,4,i+1)
  81.         #plt.figure()
  82.         plt.scatter(data[:,0],data[:,1],c=data[:,2])
  83.     
  84. data_list = []
  85. show_all()


 
使用scikit的kmeans进行聚类
  1. %matplotlib inline
  2. import scipy.io as sio
  3. #matlab文件名
  4. two_cluster=u'cluster_data/two_cluster.mat'
  5. data=sio.loadmat(two_cluster)
  6. print data
  1. %matplotlib inline
  2. import matplotlib.pyplot as plt
  3. x = data['X']
  4. cValue = x[2]
  5. plt.scatter(x[0],x[1],c=cValue)
  1. from sklearn import cluster, datasets
  2. b = np.array(x).T
  3. b = b[:,0:2]
  4. y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(b)
  5. cValue = x[2]
  6. plt.scatter(x[0],x[1],c=y_pred)

数据集下载

scikit-learn教程


  1. %matplotlib inline
  2. import scipy.io as sio
  3. #matlab文件名
  4. two_cluster=u'cluster_data/spiral.mat'
  5. spiral=sio.loadmat(two_cluster)['spiral']
  6. spiral = spiral[0::3,:] #每隔3行取一个数据
  7. print len(spiral),len(spiral[0])
  8. cValue = spiral[:,0]
  9. print cValue.shape
  10. color = ['b','y']
  11. cValue = [color[int(i)] for i in list(cValue)]
  12. plt.scatter(spiral[:,1],spiral[:,2],c=cValue)


使用kmeans结果

  1. from sklearn import cluster, datasets
  2. y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(spiral[:,1:3])
  3. plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)


使用scipy进行聚类效果

  1. # -*- coding: utf8 -*-
  2. %matplotlib inline
  3. import scipy.io as sio
  4. import matplotlib.pyplot as plt
  5. import scipy.cluster.hierarchy as hcluster
  6. from sklearn.cluster import AgglomerativeClustering
  7. import numpy.random as random  
  8. import numpy as np  
  9. import numpy.core.fromnumeric  
  10. def loadData():
  11.     #matlab文件名  
  12.     two_cluster=u'cluster_data/spiral.mat'
  13.     spiral=sio.loadmat(two_cluster)['spiral']
  14.     spiral = spiral[0::3,:] #每隔3行取一个数据
  15.     print len(spiral),len(spiral[0])
  16.     cValue = spiral[:,0]
  17.     print cValue.shape
  18.     color = ['b','y']
  19.     cValue = [color[int(i)] for i in list(cValue)]
  20.     plt.scatter(spiral[:,1],spiral[:,2],c=cValue)
  21. def spiralSample():
  22.     plt.subplot(131)
  23.     plt.title(u'origal data')
  24.     plt.scatter(spiral[:,1],spiral[:,2],c=spiral[:,0])
  25.     #scipy进行聚类,默认depth=2(可得到两类),阈值t为距离阈值,设置criterion='maxclust',找到两类之间最小距离小于t的进行合并
  26.     #http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster
  27.     y_pred=hcluster.fclusterdata(spiral[:,1:3],criterion='maxclust',t=2)    
  28.     plt.subplot(132)
  29.     plt.title(u'use scipy to hierarchy cluster')
  30.     plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
  31.     #scikit进行聚类
  32.     plt.subplot(133)
  33.     plt.title(u'use scikit to hierarchy cluster')
  34.     y_pred = AgglomerativeClustering(n_clusters=2, linkage='ward').fit_predict(spiral[:,1:3])    
  35.     plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)
  36.     plt.show()
  37. spiralSample()




声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/774678
推荐阅读
相关标签
  

闽ICP备14008679号