赞
踩
自己造数据的能力是有限的,咱们当然可以借助外来的数据,进行练习,本次的数据源https://github.com/Jack-Cherish/Machine-Learning/blob/master/kNN/2.%E6%B5%B7%E4%BC%A6%E7%BA%A6%E4%BC%9A/datingTestSet.txt可以下载下来试用
import matplotlib.pyplot as plt import matplotlib.colors import operator import numpy as np #读取文件数据 def fileread(filem): f_open=open(filem) f_read=f_open.readlines() f_hang=len(f_read) dataH=np.zeros((f_hang,3)) lable=[] index=0 ########对数据进行处理,使得数据符合我们的分类器算法的要求 for ha in f_read: f_qukong=ha.strip()# f_zhibaio=f_qukong.split('\t')# dataH[index,:]=f_zhibaio[:3] index+=1 if f_zhibaio[-1]=="didntLike": lable.append(1) if f_zhibaio[-1] == "largeDoses": lable.append(2) if f_zhibaio[-1] == "smallDoses": lable.append(3) return dataH,lable #构建分类器 def classfen(test, train, labe, k): global so hangshu = train.shape[0] xiangjian = np.tile(test, (hangshu, 1)) - train pingfang = xiangjian ** 2 qiuhe = pingfang.sum(axis=1) kaifang = qiuhe ** 0.5 paixu = kaifang.argsort() count = {} for i in range(k): lable = labe[paixu[i]] count[lable] = count.get(lable, 0) + 1 so = sorted(count.items(), key=operator.itemgetter(1), reverse=True) return so[0][0] if __name__ == '__main__': file='data.txt' gro, ll = fileread(file) te = [60000, 5,800] plt.scatter(gro[:,0],gro[:,1],c=ll) plt.scatter(te[0:1],te[1:2],marker='*',c='red',s=400) plt.show() tw = classfen(te, gro, ll, 3) print(te) print(te[0:1])
最近较忙,如有一问,欢迎博客留言
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。