赞
踩
源:
#include "pch.h" #include <iostream> #include <fstream> #include <string> #include <math.h> using namespace std; //#define k 10 //KNN关键参数 #define mn 10 #define K_flod 15 //K折交叉验证 int k; double train_weigh[8] = {1,1,1,1,1,1,1,1};//训练集不同汉字的权重值。 struct node { string nn, name, shuxing,sbname; //nn:以字符串类型存储每个 txt 文件产生的 01 矩阵 //name:存储训练集的类别名称 //shuxing:存储测试集最终判别的类别名字 //sbname:测试字识别结果 double dis, x1, x2, diss; bool s; //dis:存储测试字和训练字的欧式距离 //diss:存储测试字和训练字的曼哈顿距离 //x1:识别为当前测试字的k个欧式(曼哈顿)距离的比例 //x2:识别为其他种类字体的k个欧式(曼哈顿)距离的比例 node() { nn = "000"; name = "000"; shuxing = "000"; sbname = "000"; dis = 0.0; x1 = 0.0; x2 = 0.0; s = false; } }; node dis_o[720];//存储欧式(曼哈顿)距离 node ceshi[720];//存储测试集 double tru[720];//K折交叉验证每次验证的正确率 double err[720];//K折交叉验证每次验证的误差率 double K_tru[8];//K近邻时,每个汉字的最终正确率 //double K_err[8];//K近邻时,每个汉字最终错误率 string typp[8] = { "bei","jing","xin","xi","ke","ji","da","xue" };//可以识别汉字的种类 node a[720];//全部数据集 //将全部数据(720个128*128的二值化矩阵读入存储在a这个结构体数组中) void read() { int i, j; string nn; string mi[90] = { "01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26" ,"27","28","29","30", "31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56" ,"57","58","59","60" , "61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86" ,"87","88","89","90" }; for (i = 0, j = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\bei_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "bei"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\jing_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "jing"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xin_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "xin"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xi_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "xi"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ke_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "ke"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ji_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "ji"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\da_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "da"; rf.close(); } for (i = 0; i < 90; i++, j++) { string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xue_" + mi[i] + ".txt"; ifstream rf(ader); while (rf >> nn) { a[j].nn = a[j].nn + nn; } a[j].name = "xue"; rf.close(); } } //初始化训练集不同汉字的权重值。 void InitTrWei() { cout << "请分别输入8个汉字的权重值: "; cin >> train_weigh[0]; cin >> train_weigh[1]; cin >> train_weigh[2]; cin >> train_weigh[3]; cin >> train_weigh[4]; cin >> train_weigh[5]; cin >> train_weigh[6]; cin >> train_weigh[7]; } //计算欧式距离 double dis (node x1,node x2) { const char *ar1,*ar2; double sum = 0; int j; ar1 = x1.nn.c_str(); ar2 = x2.nn.c_str(); for (j = 0; j < 1024; j++) { sum += (ar1[j] - ar2[j])*(ar1[j] - ar2[j]); } return sqrt(sum); } //计算曼哈顿距离 double diss(node x1, node x2) { const char *ar1, *ar2; double sum = 0; int j; ar1 = x1.nn.c_str(); ar2 = x2.nn.c_str(); for (j = 0; j < 1024; j++) { sum += fabs(ar1[j] - ar2[j]); } return sum; } //快速排序 int Par(node *a,int low,int high) { node p = a[low]; while(low<high){ while (low < high&&a[high].dis >= p.dis)--high; a[low].name = a[high].name; a[low].dis = a[high].dis; while (low < high&&a[low].dis <= p.dis)++low; a[high].dis = a[low].dis; a[high].name = a[low].name; } a[low].dis = p.dis; a[low].name = p.name; return low; } void Qsort(node *a,int low ,int high) { if (low < high) { int p = Par(a, low, high); Qsort(a, low, p - 1); Qsort(a, p + 1, high); } } //判断某被测汉字的k最近邻测试结果 void K_select(node& p) { double a[8] = {0}; int i; Qsort(dis_o, 0, (90 - 90 / K_flod)*8-1); for (i = 0;i<k;i++) { if (dis_o[i].name=="bei") { a[0]++; } if (dis_o[i].name == "jing") { a[1]++; } if (dis_o[i].name == "xin") { a[2]++; } if (dis_o[i].name == "xi") { a[3]++; } if (dis_o[i].name == "ke") { a[4]++; } if (dis_o[i].name == "ji") { a[5]++; } if (dis_o[i].name == "da") { a[6]++; } if (dis_o[i].name == "xue") { a[7]++; } } //乘上权重 for (i = 0; i < 8; i++) { a[i] *= train_weigh[i]; } double max = 0; int t=0; for(i=0;i<8;i++){ if (a[i] > max) { max = a[i]; t = i; } } switch (t) { case 0:p.sbname = "bei"; break; case 1:p.sbname = "jing"; break; case 2:p.sbname = "xin"; break; case 3:p.sbname = "xi"; break; case 4:p.sbname = "ke"; break; case 5:p.sbname = "ji"; break; case 6:p.sbname = "da"; break; case 7:p.sbname = "xue"; break; } } //分析测试集结果 double ceshiFenxi() { int i,j; double err=0, tru=0; for (i = 0; i < (90/K_flod)*8; i++) { if (ceshi[i].name == ceshi[i].sbname) tru++; else err++; } return tru / (tru + err); } //K折交叉验证 void k_K_flod(){ int x1 = 0;//记录交叉验证第几折 int i,j,t,jj,e; double b[8] = { 0 }; for (i = 0; i < 720; i++) { a[i].s = false; } for (x1 = 0; x1 < K_flod;x1++) { t = 0; //选出第x1折交叉验证的测试集 for (e = 0; e < 8; e++) { for (j = 90 - (K_flod - x1)*(90 / K_flod)+e*90; j < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod+e*90; j++) { a[j].s = true; } } for (e = 0; e < 8; e++) { double tru = 0, err = 0; //计算第x1折交叉验证的每个字的测试集准确率 for (i = 90 - (K_flod - x1)*(90 / K_flod) + e * 90; i < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod + e * 90; i++) { if (a[i].name == a[i].sbname) tru++; else err++; } b[e] += tru / (tru + err); } for (i = 0; i < 720; i++) { a[i].s = false; } } for (i = 0; i < 8; i++) { K_tru[i] = b[i] / K_flod; } } int main() { read();//将全部数据集读入 string ader = "F:\\计算机综合实践\\15折交交叉验证_曼哈顿距离.txt"; ofstream rf(ader); for (int j = 5; j <= 40; j++) { k = j; k_K_flod();//进行K折交叉验证得到每个汉字识别的正确率并保存到 K_tru 数组中 for (int i = 0; i < 8; i++) { cout << K_tru[i] <<endl; rf << K_tru[i] << endl; } rf << endl << endl; cout << endl<<endl; } return 0; }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。