赞
踩
import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris def distance(vex1,vex2): return np.sqrt(np.sum(np.power(vex1-vex2,2))) def kMeans_way(S,k,distMeas=distance): m=np.shape(S)[0] sampleTag = np.zeros(m) n=np.shape(S)[1] clusterCenter = np.mat(np.zeros((k,n))) for j in range(n): minJ=min(S[:,j]) maxJ=max(S[:,j]) rangeJ=float(maxJ-minJ) clusterCenter[:,j]=np.mat(minJ + rangeJ*np.random.rand(k,1)) sampleTagChanged = True SSE = 0.0 while sampleTagChanged: sampleTagChanged = False SSE = 0.0 for i in range(m): minD = np.inf minIndex = -1 for j in range(k): d=distMeas(clusterCenter[j,:],S[i,:]) if d<minD: minD=d minIndex=j if sampleTag[i]!=minIndex: sampleTagChanged = True sampleTag[i] = minIndex SSE+=minD**2 #print (SSE) for j in range(k): ClustI=S[np.nonzero(sampleTag[:]==j)[0]] clusterCenter[j,:]= np.mean(ClustI,axis=0) return clusterCenter,sampleTag,SSE def draw_pic(samples,clusterCenter,sampleTag): k=len(clusterCenter) plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False markers=['sg','py','ob','pk'] for i in range(k): data_pos = samples[sampleTag== i] plt.plot(data_pos[:,0].tolist(),data_pos[:,1].tolist(),markers[i]) plt.plot(clusterCenter[:,0].tolist(),clusterCenter[:,1].tolist(),"r*",markersize=20) plt.title('鸢尾花') plt.show() def predict(datas,centroids,k): print (type(centroids)) return np.array([np.argmin(((np.tile(data,(k,1))-centroids)**2).sum(axis=1)) for data in datas]) def draw_range(data,centers,k): x_min,x_max=data[:,0].min()-1,data[:,0].max()+1 y_min,y_max=data[:,1].min()-1,data[:,1].max()+1 xx,yy = np.meshgrid(np.arange(x_min,x_max,0.02),np.arange(y_min,y_max,0.02)) z = predict(np.c_[xx.ravel(),yy.ravel()],centers,k) z = z.reshape(xx.shape) cs = plt.contourf(xx,yy,z) def main(): k=4 print ("----------ing-------------") choose = 0 if choose == 0: iris_data = load_iris() data= iris_data.data[:,:2] else: data = np.genfromtxt("kmeans.txt",delimiter=" ") min_loss = 10000 min_loss_clusterCenter = np.array([]) min_loss_sampleTag = np.array([]) for i in range(50): clusterCenter,sampleTag,sse = kMeans_way(data,k) loss = sse if loss <min_loss: min_loss =loss min_loss_clusterCenter = clusterCenter min_loss_sampleTag=sampleTag clusterCenter=np.array(min_loss_clusterCenter) sampleTag=min_loss_sampleTag draw_range(data,clusterCenter,k) draw_pic(data,clusterCenter,sampleTag) main()
import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import load_iris def distance(vex1,vex2): return np.sqrt(np.sum(np.power(vex1-vex2,2))) def kMeans_way(S,k,distMeas=distance): m=np.shape(S)[0] sampleTag = np.zeros(m) n=np.shape(S)[1] clusterCenter = np.mat(np.zeros((k,n))) for j in range(n): minJ=min(S[:,j]) maxJ=max(S[:,j]) rangeJ=float(maxJ-minJ) clusterCenter[:,j]=np.mat(minJ + rangeJ*np.random.rand(k,1)) sampleTagChanged = True SSE = 0.0 while sampleTagChanged: sampleTagChanged = False SSE = 0.0 for i in range(m): minD = np.inf minIndex = -1 for j in range(k): d=distMeas(clusterCenter[j,:],S[i,:]) if d<minD: minD=d minIndex=j if sampleTag[i]!=minIndex: sampleTagChanged = True sampleTag[i] = minIndex SSE+=minD**2 #print (SSE) for j in range(k): ClustI=S[np.nonzero(sampleTag[:]==j)[0]] clusterCenter[j,:]= np.mean(ClustI,axis=0) return clusterCenter,sampleTag,SSE def draw_pic(samples,clusterCenter,sampleTag): k=len(clusterCenter) plt.rcParams['font.sans-serif']=['SimHei'] plt.rcParams['axes.unicode_minus'] = False markers=['sg','py','ob','pk'] for i in range(k): data_pos = samples[sampleTag== i] plt.plot(data_pos[:,0].tolist(),data_pos[:,1].tolist(),markers[i]) plt.plot(clusterCenter[:,0].tolist(),clusterCenter[:,1].tolist(),"r*",markersize=20) plt.title('鸢尾花') plt.show() def predict(datas,centroids,k): print (type(centroids)) return np.array([np.argmin(((np.tile(data,(k,1))-centroids)**2).sum(axis=1)) for data in datas]) def draw_range(data,centers,k): x_min,x_max=data[:,0].min()-1,data[:,0].max()+1 y_min,y_max=data[:,1].min()-1,data[:,1].max()+1 xx,yy = np.meshgrid(np.arange(x_min,x_max,0.02),np.arange(y_min,y_max,0.02)) z = predict(np.c_[xx.ravel(),yy.ravel()],centers,k) print (xx.shape) print (z.shape) z = z.reshape(xx.shape) cs = plt.contourf(xx,yy,z) def main(): print ("----------ing-------------") data = np.genfromtxt("kmeans.txt", delimiter=" ") min_loss = 10000 min_loss_clusterCenter = np.array([]) min_loss_sampleTag = np.array([]) loss_list =[] for k in range(2,10): clusterCenter,sampleTag,sse = kMeans_way(data,k) loss = sse if loss <min_loss: min_loss =loss min_loss_clusterCenter = clusterCenter min_loss_sampleTag=sampleTag loss_list.append(min_loss) print (loss_list) plt.plot(range(2,10),loss_list) plt.xlabel("x") plt.xlabel("loss") plt.show() """ clusterCenter=np.array(min_loss_clusterCenter) sampleTag=min_loss_sampleTag draw_range(data,clusterCenter,k) draw_pic(data,clusterCenter,sampleTag) """ main()
说明:有明显拐点出,即为最佳值。(不一定都存在最佳点)
1.658985 4.285136 -3.453687 3.424321 4.838138 -1.151539 -5.379713 -3.362104 0.972564 2.924086 -3.567919 1.531611 0.450614 -3.302219 -3.487105 -1.724432 2.668759 1.594842 -3.156485 3.191137 3.165506 -3.999838 -2.786837 -3.099354 4.208187 2.984927 -2.123337 2.943366 0.704199 -0.479481 -0.392370 -3.963704 2.831667 1.574018 -0.790153 3.343144 2.943496 -3.357075 -3.195883 -2.283926 2.336445 2.875106 -1.786345 2.554248 2.190101 -1.906020 -3.403367 -2.778288 1.778124 3.880832 -1.688346 2.230267 2.592976 -2.054368 -4.007257 -3.207066 2.257734 3.387564 -2.679011 0.785119 0.939512 -4.023563 -3.674424 -2.261084 2.046259 2.735279 -3.189470 1.780269 4.372646 -0.822248 -2.579316 -3.497576 1.889034 5.190400 -0.798747 2.185588 2.836520 -2.658556 -3.837877 -3.253815 2.096701 3.886007 -2.709034 2.923887 3.367037 -3.184789 -2.121479 -4.232586 2.329546 3.179764 -3.284816 3.273099 3.091414 -3.815232 -3.762093 -2.432191 3.542056 2.778832 -1.736822 4.241041 2.127073 -2.983680 -4.323818 -3.938116 3.792121 5.135768 -4.786473 3.358547 2.624081 -3.260715 -4.009299 -2.978115 2.493525 1.963710 -2.513661 2.642162 1.864375 -3.176309 -3.171184 -3.572452 2.894220 2.489128 -2.562539 2.884438 3.491078 -3.947487 -2.565729 -2.012114 3.332948 3.983102 -1.616805 3.573188 2.280615 -2.559444 -2.651229 -3.103198 2.321395 3.154987 -1.685703 2.939697 3.031012 -3.620252 -4.599622 -2.185829 4.196223 1.126677 -2.133863 3.093686 4.668892 -2.562705 -2.793241 -2.149706 2.884105 3.043438 -2.967647 2.848696 4.479332 -1.764772 -4.905566 -2.911070
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。