赞
踩
from sklearn.decomposition import PCA from sklearn.datasets import fetch_olivetti_faces from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import numpy as np import cv2 import math data = fetch_olivetti_faces() x = data.data y = data.target x_train,x_test,y_train,y_test = train_test_split(x_copy,y,test_size=0.33,random_state=15) classifier = SVC(kernel='linear') history = classifier.fit(x_train,y_train) score = classifier.score(x_test,y_test)
from sklearn.decomposition import PCA from sklearn.datasets import fetch_olivetti_faces from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt import numpy as np import cv2 import math data = fetch_olivetti_faces() x = data.data y = data.target feature_ratio = np.linspace(0.5,0.99,20) # 保留数据的特征从50% 尝试到 99% x_shape=[] #按照保存特征比例进行 PCA 降维之后,数据的维度保存在这个列表中 scores = [] #每次降维后的数据的评分保存在这里面 for i in feature_ratio: pca = PCA(i) x_copy = pca.fit_transform(x) x_train,x_test,y_train,y_test = train_test_split(x_copy,y,test_size=0.33,random_state=15) classifier = SVC(kernel='linear') history = classifier.fit(x_train,y_train) score = classifier.score(x_test,y_test) x_shape.append(x_copy.shape[1]) scores.append(score) plt.plot(x_shape,scores) plt.xlabel('number of features') plt.ylabel('accuracy') plt.show()
x_shape=[] scores = [] for i in range(35,60): pca = PCA(i) x_copy = pca.fit_transform(x) x_train,x_test,y_train,y_test = train_test_split(x_copy,y,test_size=0.33,random_state=15) classifier = SVC(kernel='linear') history = classifier.fit(x_train,y_train) score = classifier.score(x_test,y_test) x_shape.append(x_copy.shape[1]) scores.append(score) plt.plot(x_shape,scores) plt.xlabel('number of features') plt.ylabel('accuracy') plt.show()
用 PCA 把原数据降维成 35 维
使用 explained_variance_
属性来看留下的每个特征的贡献率
然后使用 np.cumsum
对所有的特征贡献度进行累加,最后的结果就是剩下的特征占全部特征信息的比例,为 66.13649 %
反过来再验证一下,看保留 66.13649 %
信息的时候,是否真的能达到如此高的分数;检验成功。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。