当前位置:   article > 正文

哈夫曼编码译码器_haffman 编码及译码 java

haffman 编码及译码 java

要求如下

 在这里我们看到,要求要进行编码和译码,并且要能计算压缩率,还要有文件读取的操作,因此我们先进行文件读取操作的学习


前置知识

 1.fopen-打开一个文件,在这里踩了个坑是,在把文件路径复制到代码中时,需要在\后面再加上一个\,因为需要进行转义要把特殊字符\转换成普通字符,例子如下

  1. #include <stdio.h>
  2. int main() {
  3. printf("\\"); // 输出反斜杠字符 "\"
  4. return 0;
  5. }

输出结果是\

2.fscanf 从文件中读入

3.fprintf 向文件输入

接下来我们就要开始编写程序了


首先我们要先定义一些基本的东西

  1. struct character
  2. {
  3. char ch;
  4. int number = 0;
  5. }; //文件中出现的字符和其出现次数
  6. struct huffmannode
  7. {
  8. int weight;
  9. int parent;
  10. int left;
  11. int right;
  12. char value;
  13. int i;
  14. }; //哈夫曼节点
  15. struct huffmancode
  16. {
  17. int bit[MAXBIT];
  18. int front;
  19. }; //记录每个字符哈夫曼编码

最开始我们要计算不同字符在文本中出现的频率,在这里统一说明,我们的功能应该在函数中分开实现,代码如下

  1. void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
  2. {
  3. FILE* file = NULL;
  4. if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
  5. {
  6. printf("打开word.txt失败!\n");
  7. exit(0);
  8. }//如果没有打开,就输出失败,然后直接结束程序-后路
  9. char ch;
  10. int i = 0, j = 0, judge = 0;;
  11. fscanf(file, "%c", &ch);
  12. chars[i].ch = ch;
  13. chars[i].number++;
  14. i++;
  15. while (ch != '#') //从文件读入文本,以#标记结束
  16. {
  17. fscanf(file, "%c", &ch);
  18. judge = 0;
  19. if (ch != '#')
  20. {
  21. for (j = 0; j < i; j++)
  22. {
  23. if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
  24. {
  25. chars[j].number++;
  26. judge = 1;
  27. break;
  28. }
  29. }
  30. if (judge == 0)
  31. {
  32. chars[i].ch = ch;
  33. chars[i].number++;
  34. i++;
  35. }//如果没有出现在已知,那么就创建新的
  36. }
  37. }
  38. n = i;//记录文件中出现的字母数目
  39. fclose(file);
  40. }

在这里借鉴了一下别人的读取思路,总之就是将读入的字符记录,如果新读入的字符不在原本的字符样本里面,就把这个新加进去,思路还是十分明确的,然后我们要实现计算字符频率的事情,代码如下

  1. void printout(character ch[], int n)
  2. {
  3. int total = 0;
  4. for (int i = 0; i < n; i++)
  5. {
  6. total = total + ch[i].number;
  7. }
  8. for (int i = 0; i < n; i++)
  9. {
  10. printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
  11. }
  12. }

 就把每个字符出现的频率加和然后除去自身就好了


接下来我们要实现计算哈夫曼树的构造和编码,相信你已经学过什么是哈夫曼树了,在这里我的程序没有进一步用堆优化,而是直接算出来最小的两个节点,代码如下

  1. void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
  2. {
  3. int i, j, x1, x2;
  4. int maxweight = 50000;
  5. for (i = 0; i < 2 * n - 1; i++)
  6. {
  7. huffnode[i].weight = chars[i].number;
  8. huffnode[i].parent = TOP;
  9. huffnode[i].left = null;
  10. huffnode[i].right = null;
  11. huffnode[i].value = chars[i].ch;
  12. }//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
  13. for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
  14. {
  15. maxweight = MAXWEIGHT;
  16. x1 = x2 = 0;
  17. for (j = 0; j < n + i; j++)
  18. {
  19. if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
  20. {
  21. maxweight = huffnode[j].weight;
  22. x1 = j;
  23. }
  24. }//找到第一小节点
  25. maxweight = MAXWEIGHT;
  26. for (j = 0; j < n + i; j++)
  27. {
  28. if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
  29. {
  30. maxweight = huffnode[j].weight;
  31. x2 = j;
  32. }
  33. }//找到第二小节点,多了一个判定条件
  34. huffnode[x1].parent = n + i;
  35. huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
  36. huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
  37. huffnode[n + i].left = x1;
  38. huffnode[n + i].right = x2;
  39. }
  40. }

十分常规的写法,接下来就是计算每个字符的哈夫曼编码,一般都是从叶节点从下向上算,我们用一个循环来做,我们知道,树的根节点是不计入计算的,所以,循环终止的条件就是抵达根节点,代码如下

  1. void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
  2. {
  3. int father;
  4. int now, i;
  5. huffmancode temp;
  6. int p;
  7. for (i = 0; i < n; i++)
  8. {
  9. huffcode[i].front = MAXBIT - 1;
  10. now = i;
  11. father = huffnode[i].parent;
  12. while (father != TOP)//一直到根节点,根节点不存储编码
  13. {
  14. if (now == huffnode[father].left) {
  15. huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
  16. }
  17. else {
  18. huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
  19. }
  20. now = father;
  21. father = huffnode[father].parent;
  22. huffcode[i].front--;
  23. }
  24. ++huffcode[i].front;//多减了一位,一定要有自加一次
  25. }
  26. }

接下来就是把文件进行编码了,有了上面的函数,这一步是十分轻松的,代码如下

  1. void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
  2. {
  3. FILE* file1 = NULL;
  4. FILE* file2 = NULL;
  5. if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
  6. {
  7. printf("打开word.txt失败!\n");
  8. exit(0);
  9. }
  10. if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
  11. {
  12. printf("打开asc.txt失败!\n");
  13. exit(0);
  14. }
  15. char ch;
  16. fscanf(file1, "%c", &ch);
  17. if (ch != '#')
  18. {
  19. for (int i = 0; i < n; i++)
  20. {
  21. if (huffnode[i].value == ch)
  22. {
  23. for (int j = huffcode[i].front; j < MAXBIT; j++)
  24. {
  25. fprintf(file2, "%d", huffcode[i].bit[j]);
  26. chnum++;
  27. }
  28. }
  29. }
  30. }
  31. while (ch != '#') {
  32. fscanf(file1, "%c", &ch);
  33. if (ch != '#')
  34. {
  35. for (int i = 0; i < n; i++)
  36. {
  37. if (huffnode[i].value == ch)
  38. {
  39. for (int j = huffcode[i].front; j < MAXBIT; j++)
  40. {
  41. fprintf(file2, "%d", huffcode[i].bit[j]);
  42. chnum++;
  43. }
  44. }
  45. }
  46. }
  47. };
  48. fclose(file1);
  49. fclose(file2);
  50. }

解码代码如下

  1. void Decoding(huffmannode huffnode[], int n)
  2. {
  3. FILE* fc = NULL, * fd = NULL;
  4. if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
  5. {
  6. printf("打开asc.txt失败!\n");
  7. exit(0);
  8. }
  9. if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
  10. {
  11. printf("打开outcome.txt失败!\n");
  12. exit(0);
  13. }
  14. char nums[100000];
  15. fscanf(fc, "%s", nums);//因为译码文件没有空格
  16. int i = 0, j = 0, temp;
  17. for (i = 0; nums[i] != '\0'; i++);
  18. while (j < i)
  19. {
  20. temp = 2 * n - 2;//找到根节点
  21. while (huffnode[temp].left != null && huffnode[temp].right != null)
  22. {
  23. if (nums[j] == '0')
  24. temp = huffnode[temp].left;
  25. else
  26. temp = huffnode[temp].right;
  27. j++;
  28. }
  29. fprintf(fd, "%c", huffnode[temp].value);
  30. }
  31. fclose(fc);
  32. fclose(fd);
  33. }

计算压缩率

  1. void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
  2. {
  3. int total = 0;
  4. for (int i = 0; i < n; i++)
  5. {
  6. total =total+ ch[i].number;
  7. }
  8. printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
  9. }

在这里加上两个%%是因为要进行转义


源代码如下

  1. #include<iostream>
  2. #include<stdio.h>
  3. #include<stdlib.h>
  4. using namespace std;
  5. #define MAXWEIGHT 10000
  6. #define MAXBIT 100
  7. #define TOP -1
  8. #define null -1
  9. struct character
  10. {
  11. char ch;
  12. int number = 0;
  13. }; //文件中出现的字符和其出现次数
  14. struct huffmannode
  15. {
  16. int weight;
  17. int parent;
  18. int left;
  19. int right;
  20. char value;
  21. int i;
  22. }; //哈夫曼节点
  23. struct huffmancode
  24. {
  25. int bit[MAXBIT];
  26. int front;
  27. }; //记录每个字符哈夫曼编码
  28. void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
  29. {
  30. FILE* file = NULL;
  31. if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
  32. {
  33. printf("打开word.txt失败!\n");
  34. exit(0);
  35. }//如果没有打开,就输出失败,然后直接结束程序-后路
  36. char ch;
  37. int i = 0, j = 0, judge = 0;;
  38. fscanf(file, "%c", &ch);
  39. chars[i].ch = ch;
  40. chars[i].number++;
  41. i++;
  42. while (ch != '#') //从文件读入文本,以#标记结束
  43. {
  44. fscanf(file, "%c", &ch);
  45. judge = 0;
  46. if (ch != '#')
  47. {
  48. for (j = 0; j < i; j++)
  49. {
  50. if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
  51. {
  52. chars[j].number++;
  53. judge = 1;
  54. break;
  55. }
  56. }
  57. if (judge == 0)
  58. {
  59. chars[i].ch = ch;
  60. chars[i].number++;
  61. i++;
  62. }//如果没有出现在已知,那么就创建新的
  63. }
  64. }
  65. n = i;//记录文件中出现的字母数目
  66. fclose(file);
  67. }
  68. void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
  69. {
  70. int i, j, x1, x2;
  71. int maxweight = 50000;
  72. for (i = 0; i < 2 * n - 1; i++)
  73. {
  74. huffnode[i].weight = chars[i].number;
  75. huffnode[i].parent = TOP;
  76. huffnode[i].left = null;
  77. huffnode[i].right = null;
  78. huffnode[i].value = chars[i].ch;
  79. }//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
  80. for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
  81. {
  82. maxweight = MAXWEIGHT;
  83. x1 = x2 = 0;
  84. for (j = 0; j < n + i; j++)
  85. {
  86. if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
  87. {
  88. maxweight = huffnode[j].weight;
  89. x1 = j;
  90. }
  91. }//找到第一小节点
  92. maxweight = MAXWEIGHT;
  93. for (j = 0; j < n + i; j++)
  94. {
  95. if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
  96. {
  97. maxweight = huffnode[j].weight;
  98. x2 = j;
  99. }
  100. }//找到第二小节点,多了一个判定条件
  101. huffnode[x1].parent = n + i;
  102. huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
  103. huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
  104. huffnode[n + i].left = x1;
  105. huffnode[n + i].right = x2;
  106. }
  107. }
  108. void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
  109. {
  110. int father;
  111. int now, i;
  112. huffmancode temp;
  113. int p;
  114. for (i = 0; i < n; i++)
  115. {
  116. huffcode[i].front = MAXBIT - 1;
  117. now = i;
  118. father = huffnode[i].parent;
  119. while (father != TOP)//一直到根节点,根节点不存储编码
  120. {
  121. if (now == huffnode[father].left) {
  122. huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
  123. }
  124. else {
  125. huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
  126. }
  127. now = father;
  128. father = huffnode[father].parent;
  129. huffcode[i].front--;
  130. }
  131. ++huffcode[i].front;//多减了一位,一定要有自加一次
  132. }
  133. }
  134. void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
  135. {
  136. FILE* file1 = NULL;
  137. FILE* file2 = NULL;
  138. if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
  139. {
  140. printf("打开word.txt失败!\n");
  141. exit(0);
  142. }
  143. if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
  144. {
  145. printf("打开asc.txt失败!\n");
  146. exit(0);
  147. }
  148. char ch;
  149. fscanf(file1, "%c", &ch);
  150. if (ch != '#')
  151. {
  152. for (int i = 0; i < n; i++)
  153. {
  154. if (huffnode[i].value == ch)
  155. {
  156. for (int j = huffcode[i].front; j < MAXBIT; j++)
  157. {
  158. fprintf(file2, "%d", huffcode[i].bit[j]);
  159. chnum++;
  160. }
  161. }
  162. }
  163. }
  164. while (ch != '#') {
  165. fscanf(file1, "%c", &ch);
  166. if (ch != '#')
  167. {
  168. for (int i = 0; i < n; i++)
  169. {
  170. if (huffnode[i].value == ch)
  171. {
  172. for (int j = huffcode[i].front; j < MAXBIT; j++)
  173. {
  174. fprintf(file2, "%d", huffcode[i].bit[j]);
  175. chnum++;
  176. }
  177. }
  178. }
  179. }
  180. };
  181. fclose(file1);
  182. fclose(file2);
  183. }
  184. void Decoding(huffmannode huffnode[], int n)
  185. {
  186. FILE* fc = NULL, * fd = NULL;
  187. if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
  188. {
  189. printf("打开asc.txt失败!\n");
  190. exit(0);
  191. }
  192. if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
  193. {
  194. printf("打开outcome.txt失败!\n");
  195. exit(0);
  196. }
  197. char nums[100000];
  198. fscanf(fc, "%s", nums);//因为译码文件没有空格
  199. int i = 0, j = 0, temp;
  200. for (i = 0; nums[i] != '\0'; i++);
  201. while (j < i)
  202. {
  203. temp = 2 * n - 2;//找到根节点
  204. while (huffnode[temp].left != null && huffnode[temp].right != null)
  205. {
  206. if (nums[j] == '0')
  207. temp = huffnode[temp].left;
  208. else
  209. temp = huffnode[temp].right;
  210. j++;
  211. }
  212. fprintf(fd, "%c", huffnode[temp].value);
  213. }
  214. fclose(fc);
  215. fclose(fd);
  216. }
  217. void printout(character ch[], int n)
  218. {
  219. int total = 0;
  220. for (int i = 0; i < n; i++)
  221. {
  222. total = total + ch[i].number;
  223. }
  224. for (int i = 0; i < n; i++)
  225. {
  226. printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
  227. }
  228. }
  229. void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
  230. {
  231. int total = 0;
  232. for (int i = 0; i < n; i++)
  233. {
  234. total =total+ ch[i].number;
  235. }
  236. printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
  237. }
  238. int main()
  239. {
  240. huffmannode huffnode[500];
  241. character chars[500];
  242. huffmancode huffcode[500];
  243. int n = 0, chnum = 0;
  244. cout << "********请选择:1 压缩 2 解压 ********|" << endl;
  245. cout << "!!!!!注意!!!!! |" << endl;
  246. cout << "****系统只支持编码或者解压分开进行****|" << endl;
  247. cout << "************输入0退出系统*************|" << endl;
  248. cout << "---------------------------------------" << endl;
  249. int x; cin >> x;
  250. while (x != 0) {
  251. if (x == 1) {
  252. cout << "系统将自动压缩你保存在本地的word.txt文件" << endl;
  253. tongji(chars, n);
  254. printout(chars, n);
  255. createhuffmantree(huffnode, n, chars);
  256. Coding(huffnode, huffcode, n);
  257. CodingFile(huffnode, huffcode, n, chnum);
  258. Calculate_yasuo(chars, n, chnum);
  259. cout << "------------------------------------------" << endl;
  260. cout << "********请选择:1 压缩 2 解压 ********|" << endl;
  261. cout << "!!!!!注意!!!!! |" << endl;
  262. cout << "****系统只支持编码或者解压分开进行****|" << endl;
  263. cout << "************输入0退出系统*************|" << endl;
  264. cout << "---------------------------------------" << endl;
  265. }
  266. else {
  267. cout << "系统将自动为您解压您保存在本地的asc.txt文件" << endl;
  268. tongji(chars, n);
  269. Decoding(huffnode, n);
  270. cout << "=>解压成功,内容已保存在本地outcome.txt文件" << endl;
  271. cout << "------------------------------------------" << endl;
  272. cout << "********请选择:1 压缩 2 解压 ********|" << endl;
  273. cout << "!!!!!注意!!!!! |" << endl;
  274. cout << "****系统只支持编码或者解压分开进行****|" << endl;
  275. cout << "************输入0退出系统*************|" << endl;
  276. cout << "---------------------------------------" << endl;
  277. }
  278. cin >> x;
  279. if (x == 0) {
  280. cout << "您已成功退出系统!";
  281. exit(0);
  282. }
  283. }//进行循环,保证能编码和译码都能进行
  284. return 0;
  285. }

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/笔触狂放9/article/detail/859220
推荐阅读
相关标签
  

闽ICP备14008679号