赞
踩
- from scipy.cluster.hierarchy import dendrogram, linkage,fcluster
- from matplotlib import pyplot as plt
- X = [[1,2],[3,2],[4,4],[1,2],[1,3]]
- Z = linkage(X, 'ward')
- f = fcluster(Z,4,'distance')
- fig = plt.figure(figsize=(5, 3))
- dn = dendrogram(Z)
- plt.show()
- from scipy.cluster.hierarchy import dendrogram, linkage,fcluster
- import pandas as pd
- import numpy as np
- from matplotlib import pyplot as plt
- from pylab import mpl
- mpl.rcParams['font.sans-serif'] = ['SimHei']
- from sklearn.preprocessing import MinMaxScaler
- from sklearn.preprocessing import StandardScaler
- #
- data = pd.read_csv(r"WBClust2013.csv",index_col = 'Country')
- print(data.head(5))
-
- #极差标准化变换
- mm = MinMaxScaler()
- mm_data = mm.fit_transform(data)#归一化后的数据
-
- #标准化变换
- ss = StandardScaler()
- std_data = ss.fit_transform(data)#归一化后的数据
- # origin_data = ss.inverse_transform(std_data)#原始数据
-
- #层次信息
- z0=linkage(mm_data, method='average', metric='euclidean') #平均距离
- z1=linkage(std_data, method='average', metric='euclidean') #平均距离
-
- #类个数
- f=fcluster(z0, t=4, criterion='maxclust')#'inconsistent’, ‘distance’
- f=fcluster(z1, t=4, criterion='maxclust')#'inconsistent’, ‘distance’
-
- fig = plt.figure(figsize=(5, 3))
- plt.title('极差标准化')
- dn = dendrogram(z0)
- plt.show()
- plt.title('标准化')
- dn = dendrogram(z1)
- plt.show()
-
-
-
- #第二题
- data = pd.read_csv(r"NASAUnderstory.csv",index_col = 'Overstory Species')
- print(data.head(5))
- #极差标准化变换
- mm = MinMaxScaler()
- mm_data = mm.fit_transform(data)#归一化后的数据
-
- #标准化变换
- ss = StandardScaler()
- std_data = ss.fit_transform(data)#归一化后的数据
-
- #层次信息
- z0=linkage(mm_data, method='average', metric='euclidean') #平均距离
- z1=linkage(std_data, method='average', metric='euclidean') #平均距离
-
- #类个数
- f=fcluster(z0, t=4, criterion='maxclust')
- f=fcluster(z1, t=4, criterion='maxclust')
-
- fig = plt.figure(figsize=(5, 3))
- plt.title('极差标准化')
- dn = dendrogram(z0)
- plt.show()
- plt.title('标准化')
- dn = dendrogram(z1)
- plt.show()

方法二
- from scipy.cluster.hierarchy import dendrogram, linkage,fcluster
- from matplotlib import pyplot as plt
- import pandas as pd
- import numpy as np
- from sklearn.preprocessing import StandardScaler
-
- data=pd.read_csv(r"./WBClust2013.csv",header=0,index_col=0)
- print('--------------作业1--------------')
- # 数据预处理
- ss = StandardScaler()
- data = ss.fit_transform(data)#归一化后的数据
-
- # 模型训练
- metrics=['Euclidean']
- # 'mantattan','cosine','chebyshev'
- # 选择不用的距离度量:
- # 欧氏距离 曼哈顿距离 余弦相似性 切比雪夫距离
- Z = linkage(np.array(data), method='ward',metric='euclidean')
- f = fcluster(Z,4,'distance')
- fig = plt.figure(figsize=(14, 8))
- dn = dendrogram(Z)
- plt.title(f"method=ward,metric=euclidean")
- plt.show()
-
- # 选择不同的类间距离,距离度量标准为欧式距离
- methods=['single','complete','average','ward']
- for th in methods:
- Z = linkage(np.array(data), method=th,metric='euclidean')
- f = fcluster(Z,4,'distance')
- fig = plt.figure(figsize=(14, 8))
- dn = dendrogram(Z)
- plt.title(f"method={th},metric=euclidean")
- plt.show()
-
-
- print('--------------作业2--------------')
- data1=pd.read_csv(r"./NASAUnderstory.csv",header=0,index_col=1)
-
- data1=data1.drop('Labels', axis=1)
- data1 = ss.fit_transform(data1)
- methods=['single','complete','average','ward']
- for th in methods:
- Z = linkage(np.array(data1), method=th,metric='euclidean')
- f = fcluster(Z,4,'distance')
- fig = plt.figure(figsize=(14, 8))
- dn = dendrogram(Z)
- plt.title(f"method={th},metric=euclidean")
- plt.show()
-
- '''
- from scipy.cluster.hierarchy import dendrogram, linkage,fcluster
- from matplotlib import pyplot as plt
- X = [[1,2],[3,2],[4,4],[1,2],[1,3]]
- Z = linkage(X, 'ward')
- f = fcluster(Z,4,'distance')
- fig = plt.figure(figsize=(5, 3))
- dn = dendrogram(Z)
- plt.show()
- '''

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。