当前位置:   article > 正文

C++实现基于KNN的手写体识别

C++实现基于KNN的手写体识别

一系统结构

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

二、数据获取与预处理

在这里插入图片描述
在这里插入图片描述

三、KNN算法与K折交叉验证

在这里插入图片描述
在这里插入图片描述
源:

#include "pch.h"
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
using namespace std;
//#define k 10   //KNN关键参数
#define mn 10
#define K_flod 15  //K折交叉验证
int k;
double train_weigh[8] = {1,1,1,1,1,1,1,1};//训练集不同汉字的权重值。
struct node {
 string nn, name, shuxing,sbname;
 //nn:以字符串类型存储每个 txt 文件产生的 01 矩阵
 //name:存储训练集的类别名称
 //shuxing:存储测试集最终判别的类别名字
 //sbname:测试字识别结果
 double dis, x1, x2, diss;
 bool s;
 //dis:存储测试字和训练字的欧式距离
 //diss:存储测试字和训练字的曼哈顿距离
 //x1:识别为当前测试字的k个欧式(曼哈顿)距离的比例
 //x2:识别为其他种类字体的k个欧式(曼哈顿)距离的比例
 node() {
  nn = "000"; name = "000"; shuxing = "000"; sbname = "000";
  dis = 0.0; x1 = 0.0; x2 = 0.0;
  s = false;
 }
};
node dis_o[720];//存储欧式(曼哈顿)距离
node ceshi[720];//存储测试集
double tru[720];//K折交叉验证每次验证的正确率
double err[720];//K折交叉验证每次验证的误差率
double K_tru[8];//K近邻时,每个汉字的最终正确率
//double K_err[8];//K近邻时,每个汉字最终错误率
string typp[8] = { "bei","jing","xin","xi","ke","ji","da","xue" };//可以识别汉字的种类
node a[720];//全部数据集
//将全部数据(720个128*128的二值化矩阵读入存储在a这个结构体数组中)
void read() {
 int i, j;
 string nn;
 string mi[90] = { "01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26" ,"27","28","29","30",
 "31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56" ,"57","58","59","60" ,
 "61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86" ,"87","88","89","90" };
 for (i = 0, j = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\bei_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "bei";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\jing_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "jing";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xin_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xin";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xi_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xi";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ke_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ke";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\ji_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "ji";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\da_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "da";
  rf.close();
 }
 for (i = 0; i < 90; i++, j++) {
  string ader = "F:\\计算机综合实践\\手写体识别数据集\\训练集二值化数据集\\xue_" + mi[i] + ".txt";
  ifstream rf(ader);
  while (rf >> nn) {
   a[j].nn = a[j].nn + nn;
  }
  a[j].name = "xue";
  rf.close();
 }
}
//初始化训练集不同汉字的权重值。
void InitTrWei() {
 cout << "请分别输入8个汉字的权重值: ";
 cin >> train_weigh[0];
 cin >> train_weigh[1];
 cin >> train_weigh[2];
 cin >> train_weigh[3];
 cin >> train_weigh[4];
 cin >> train_weigh[5];
 cin >> train_weigh[6];
 cin >> train_weigh[7];
}
//计算欧式距离
double dis (node x1,node x2) {
 const char *ar1,*ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
  sum += (ar1[j] - ar2[j])*(ar1[j] - ar2[j]);
 }
 return sqrt(sum);
}
//计算曼哈顿距离
double diss(node x1, node x2) {
 const char *ar1, *ar2;
 double sum = 0;
 int j;
 ar1 = x1.nn.c_str();
 ar2 = x2.nn.c_str();
 for (j = 0; j < 1024; j++) {
  sum += fabs(ar1[j] - ar2[j]);
 }
 return sum;
}
//快速排序
int Par(node *a,int low,int high) {
 node p = a[low];
 while(low<high){
  while (low < high&&a[high].dis >= p.dis)--high;
  a[low].name = a[high].name;
  a[low].dis = a[high].dis;
  while (low < high&&a[low].dis <= p.dis)++low;
  a[high].dis = a[low].dis;
  a[high].name = a[low].name;
 }
 a[low].dis = p.dis;
 a[low].name = p.name;
 return low;
}
void Qsort(node *a,int low ,int high) {
 if (low < high) {
  int p = Par(a, low, high);
  Qsort(a, low, p - 1);
  Qsort(a, p + 1, high);
 }
}
//判断某被测汉字的k最近邻测试结果
void K_select(node& p) {
 double a[8] = {0};
 int i;
 Qsort(dis_o, 0, (90 - 90 / K_flod)*8-1);
 for (i = 0;i<k;i++) {
  if (dis_o[i].name=="bei") {
   a[0]++;
  }
  if (dis_o[i].name == "jing") {
   a[1]++;
  }
  if (dis_o[i].name == "xin") {
   a[2]++;
  }
  if (dis_o[i].name == "xi") {
   a[3]++;
  }
  if (dis_o[i].name == "ke") {
   a[4]++;
  }
  if (dis_o[i].name == "ji") {
   a[5]++;
  }
  if (dis_o[i].name == "da") {
   a[6]++;
  }
  if (dis_o[i].name == "xue") {
   a[7]++;
  }
 }
 //乘上权重
 for (i = 0; i < 8; i++) {
  a[i] *= train_weigh[i];
 }
 double max = 0;
 int t=0;
 for(i=0;i<8;i++){
  if (a[i] > max) {
   max = a[i];
   t = i;
  }
 }
 switch (t) {
  case 0:p.sbname = "bei"; break;
  case 1:p.sbname = "jing"; break;
  case 2:p.sbname = "xin"; break;
  case 3:p.sbname = "xi"; break;
  case 4:p.sbname = "ke"; break;
  case 5:p.sbname = "ji"; break;
  case 6:p.sbname = "da"; break;
  case 7:p.sbname = "xue"; break;
 }
}
//分析测试集结果
double ceshiFenxi() {
 int i,j;
 double err=0, tru=0;
 for (i = 0; i < (90/K_flod)*8; i++) {
  if (ceshi[i].name == ceshi[i].sbname)
   tru++;
  else
   err++;
 }
 return tru / (tru + err);
}
//K折交叉验证
void k_K_flod(){
 int x1 = 0;//记录交叉验证第几折
 int i,j,t,jj,e;
 double b[8] = { 0 };
 for (i = 0; i < 720; i++) {
  a[i].s = false;
 }
 for (x1 = 0; x1 < K_flod;x1++) {
  t = 0;
  //选出第x1折交叉验证的测试集
  for (e = 0; e < 8; e++) {
   for (j = 90 - (K_flod - x1)*(90 / K_flod)+e*90; j < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod+e*90; j++) {
    a[j].s = true;
   }
  }
  for (e = 0; e < 8; e++) {
   double tru = 0, err = 0;
   //计算第x1折交叉验证的每个字的测试集准确率
   for (i = 90 - (K_flod - x1)*(90 / K_flod) + e * 90; i < 90 - (K_flod - x1)*(90 / K_flod) + 90 / K_flod + e * 90; i++) {
    if (a[i].name == a[i].sbname)
     tru++;
    else
     err++;
   }
   b[e] += tru / (tru + err);
   }
  for (i = 0; i < 720; i++) {
   a[i].s = false;
  }
 }
 for (i = 0; i < 8; i++) {
  K_tru[i] = b[i] / K_flod;
 }
}
int main() {
 read();//将全部数据集读入
 string ader = "F:\\计算机综合实践\\15折交交叉验证_曼哈顿距离.txt";
 ofstream rf(ader);
 for (int j = 5; j <= 40; j++) {
  k = j;
  k_K_flod();//进行K折交叉验证得到每个汉字识别的正确率并保存到  K_tru   数组中
  for (int i = 0; i < 8; i++) {
   cout << K_tru[i] <<endl;
   rf << K_tru[i] << endl;
  }
  rf << endl << endl;
  cout << endl<<endl;
  }
  return 0;
  }
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/349505
推荐阅读
相关标签
  

闽ICP备14008679号