赞
踩
knn的C++实现,有不对的地方请各位指正!谢谢!
#include
#include
#include
#include
#include
using namespace std;
typedef vector Tuple;
const int k = 4;
const int dims = 3;
const int categories = 3; // 1,2,3
int RunKnn(vector& trainData,vector& label, Tuple& testData, int count);
float getDist(Tuple& trainData, Tuple& testData);
void sortDist(Tuple& dists, vector& label, int count);
void showDist(Tuple& dists, int count);
int main()
{
// 准备训练数据
cout << "Preparing the Trainning data----" << endl;
string fileName = "datingTestSet.txt";
fstream file(fileName);
if (!file)
{
cout << "can not open the file " << endl;
return 0;
}
vector trainData;
vector label;
int count = 0;
while(!file.eof())
{
string str;
getline(file,str);
stringstream ss(str);
ss << str;
Tuple temp(dims,0);
for(int i = 0; i < dims; i++)
ss >> temp[i];
trainData.push_back(temp);
int t;
ss >> t;
label.push_back(t);
count ++;
}
//训练数据归一化
Tuple maxnum(dims,0);
Tuple minnum(dims,0);
for (int i = 0; i < count; i++)
{
for (int j = 0; j < dims; j++)
{
if (trainData[i][j] > maxnum[j])
maxnum[j] = trainData[i][j];
if (trainData[i][j] < minnum[j])
minnum[j] = trainData[i][j];
}
}
float chazhi;
for (int i = 0; i < dims; i++)
{
chazhi = maxnum[i] - minnum[i];
for (int j = 0; j < count; j++)
trainData[j][i] = (trainData[j][i] - minnum[i])/chazhi;
}
//准备测试数据
cout << "Preparing the testing data-----" << endl;
int ind = 80;
Tuple testData = trainData[ind];
int truthLabel = label[ind];
//knn开始
cout << "Running" << endl;
int testLabel = RunKnn(trainData,label,testData,count);
cout << "testLabel: " << testLabel << endl;
cout << "truthLabel: " << truthLabel << endl;
if(testLabel == truthLabel)
cout << "Yes!" << endl;
else
cout << "No!" << endl;
system("pause");
return 0;
}
float getDist(Tuple& trainData, Tuple& testData)
{
float dist = 0;
for(int i = 0; i < dims; i++)
dist += (trainData[i] - testData[i]) * (trainData[i] - testData[i]);
return dist;
}
void sortDist(Tuple& dists, vector& label,int count)
{
for (int i = 1; i < count; i++)
{
for (int j = 0; j < count-i-1; j++)
{
if(dists[j] > dists[j+1])
{
float temp1 = dists[j];
dists[j] = dists[j+1];
dists[j+1] = temp1;
int temp2 = label[j];
label[j] = label[j+1];
label[j+1] = temp2;
}
}
}
}
int RunKnn(vector& trainData,vector& label, Tuple& testData, int count)
{
//计算距离
Tuple dists(count,0);
for(int i = 0; i < count; i++)
dists[i] = getDist(trainData[i],testData);
showDist(dists,count);
//距离排序
sortDist(dists,label,count);
showDist(dists,count);
//统计前k个;
int sumLabels[3] = {0};
for(int i = 0; i < k; i++)
sumLabels[label[i]-1] ++;
//统计最多的那个label
int temp = 0;
int testLabel;
for(int i = 0; i < k; i++)
{
if(sumLabels[i] > temp)
{
temp = sumLabels[i];
testLabel = i + 1;
}
}
return testLabel;
}
void showDist(Tuple& dists, int count)
{
for (int i = 0; i < count; i++)
cout << dists[i] << " ";
cout << endl;
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。