赞
踩
要求如下
在这里我们看到,要求要进行编码和译码,并且要能计算压缩率,还要有文件读取的操作,因此我们先进行文件读取操作的学习
前置知识
1.fopen-打开一个文件,在这里踩了个坑是,在把文件路径复制到代码中时,需要在\后面再加上一个\,因为需要进行转义要把特殊字符\转换成普通字符,例子如下
- #include <stdio.h>
-
- int main() {
- printf("\\"); // 输出反斜杠字符 "\"
-
- return 0;
- }
输出结果是\
2.fscanf 从文件中读入
3.fprintf 向文件输入
接下来我们就要开始编写程序了
首先我们要先定义一些基本的东西
- struct character
- {
- char ch;
- int number = 0;
- }; //文件中出现的字符和其出现次数
- struct huffmannode
- {
- int weight;
- int parent;
- int left;
- int right;
- char value;
- int i;
- }; //哈夫曼节点
- struct huffmancode
- {
- int bit[MAXBIT];
- int front;
- }; //记录每个字符哈夫曼编码
最开始我们要计算不同字符在文本中出现的频率,在这里统一说明,我们的功能应该在函数中分开实现,代码如下
- void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
- {
- FILE* file = NULL;
- if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
- {
- printf("打开word.txt失败!\n");
- exit(0);
- }//如果没有打开,就输出失败,然后直接结束程序-后路
- char ch;
- int i = 0, j = 0, judge = 0;;
- fscanf(file, "%c", &ch);
- chars[i].ch = ch;
- chars[i].number++;
- i++;
- while (ch != '#') //从文件读入文本,以#标记结束
- {
- fscanf(file, "%c", &ch);
- judge = 0;
- if (ch != '#')
- {
- for (j = 0; j < i; j++)
- {
- if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
- {
- chars[j].number++;
- judge = 1;
- break;
- }
- }
- if (judge == 0)
- {
- chars[i].ch = ch;
- chars[i].number++;
- i++;
- }//如果没有出现在已知,那么就创建新的
- }
- }
- n = i;//记录文件中出现的字母数目
- fclose(file);
- }
在这里借鉴了一下别人的读取思路,总之就是将读入的字符记录,如果新读入的字符不在原本的字符样本里面,就把这个新加进去,思路还是十分明确的,然后我们要实现计算字符频率的事情,代码如下
- void printout(character ch[], int n)
- {
- int total = 0;
- for (int i = 0; i < n; i++)
- {
- total = total + ch[i].number;
- }
- for (int i = 0; i < n; i++)
- {
- printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
- }
- }
就把每个字符出现的频率加和然后除去自身就好了
接下来我们要实现计算哈夫曼树的构造和编码,相信你已经学过什么是哈夫曼树了,在这里我的程序没有进一步用堆优化,而是直接算出来最小的两个节点,代码如下
- void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
- {
- int i, j, x1, x2;
- int maxweight = 50000;
- for (i = 0; i < 2 * n - 1; i++)
- {
- huffnode[i].weight = chars[i].number;
- huffnode[i].parent = TOP;
- huffnode[i].left = null;
- huffnode[i].right = null;
- huffnode[i].value = chars[i].ch;
- }//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
- for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
- {
- maxweight = MAXWEIGHT;
- x1 = x2 = 0;
- for (j = 0; j < n + i; j++)
- {
- if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
- {
- maxweight = huffnode[j].weight;
- x1 = j;
- }
- }//找到第一小节点
- maxweight = MAXWEIGHT;
- for (j = 0; j < n + i; j++)
- {
- if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
- {
- maxweight = huffnode[j].weight;
- x2 = j;
- }
- }//找到第二小节点,多了一个判定条件
- huffnode[x1].parent = n + i;
- huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
- huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
- huffnode[n + i].left = x1;
- huffnode[n + i].right = x2;
- }
- }
十分常规的写法,接下来就是计算每个字符的哈夫曼编码,一般都是从叶节点从下向上算,我们用一个循环来做,我们知道,树的根节点是不计入计算的,所以,循环终止的条件就是抵达根节点,代码如下
- void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
- {
- int father;
- int now, i;
- huffmancode temp;
- int p;
- for (i = 0; i < n; i++)
- {
- huffcode[i].front = MAXBIT - 1;
- now = i;
- father = huffnode[i].parent;
- while (father != TOP)//一直到根节点,根节点不存储编码
- {
- if (now == huffnode[father].left) {
- huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
- }
- else {
- huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
- }
- now = father;
- father = huffnode[father].parent;
- huffcode[i].front--;
- }
- ++huffcode[i].front;//多减了一位,一定要有自加一次
- }
- }
接下来就是把文件进行编码了,有了上面的函数,这一步是十分轻松的,代码如下
- void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
- {
- FILE* file1 = NULL;
- FILE* file2 = NULL;
- if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
- {
- printf("打开word.txt失败!\n");
- exit(0);
- }
- if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
- {
- printf("打开asc.txt失败!\n");
- exit(0);
- }
- char ch;
- fscanf(file1, "%c", &ch);
- if (ch != '#')
- {
- for (int i = 0; i < n; i++)
- {
- if (huffnode[i].value == ch)
- {
- for (int j = huffcode[i].front; j < MAXBIT; j++)
- {
- fprintf(file2, "%d", huffcode[i].bit[j]);
- chnum++;
- }
- }
- }
- }
- while (ch != '#') {
- fscanf(file1, "%c", &ch);
- if (ch != '#')
- {
- for (int i = 0; i < n; i++)
- {
- if (huffnode[i].value == ch)
- {
- for (int j = huffcode[i].front; j < MAXBIT; j++)
- {
- fprintf(file2, "%d", huffcode[i].bit[j]);
- chnum++;
- }
- }
- }
- }
- };
- fclose(file1);
- fclose(file2);
- }
解码代码如下
- void Decoding(huffmannode huffnode[], int n)
- {
- FILE* fc = NULL, * fd = NULL;
- if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
- {
- printf("打开asc.txt失败!\n");
- exit(0);
- }
- if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
- {
- printf("打开outcome.txt失败!\n");
- exit(0);
- }
- char nums[100000];
- fscanf(fc, "%s", nums);//因为译码文件没有空格
- int i = 0, j = 0, temp;
- for (i = 0; nums[i] != '\0'; i++);
- while (j < i)
- {
- temp = 2 * n - 2;//找到根节点
- while (huffnode[temp].left != null && huffnode[temp].right != null)
- {
- if (nums[j] == '0')
- temp = huffnode[temp].left;
- else
- temp = huffnode[temp].right;
- j++;
- }
- fprintf(fd, "%c", huffnode[temp].value);
- }
- fclose(fc);
- fclose(fd);
- }
计算压缩率
- void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
- {
- int total = 0;
- for (int i = 0; i < n; i++)
- {
- total =total+ ch[i].number;
- }
- printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
- }
在这里加上两个%%是因为要进行转义
源代码如下
- #include<iostream>
- #include<stdio.h>
- #include<stdlib.h>
- using namespace std;
- #define MAXWEIGHT 10000
- #define MAXBIT 100
- #define TOP -1
- #define null -1
- struct character
- {
- char ch;
- int number = 0;
- }; //文件中出现的字符和其出现次数
- struct huffmannode
- {
- int weight;
- int parent;
- int left;
- int right;
- char value;
- int i;
- }; //哈夫曼节点
- struct huffmancode
- {
- int bit[MAXBIT];
- int front;
- }; //记录每个字符哈夫曼编码
- void tongji(character chars[], int& n) //统计原文本中出现的字符及其使用频率
- {
- FILE* file = NULL;
- if ((file = fopen("C:\\huff\\word.txt", "r")) == NULL)
- {
- printf("打开word.txt失败!\n");
- exit(0);
- }//如果没有打开,就输出失败,然后直接结束程序-后路
- char ch;
- int i = 0, j = 0, judge = 0;;
- fscanf(file, "%c", &ch);
- chars[i].ch = ch;
- chars[i].number++;
- i++;
- while (ch != '#') //从文件读入文本,以#标记结束
- {
- fscanf(file, "%c", &ch);
- judge = 0;
- if (ch != '#')
- {
- for (j = 0; j < i; j++)
- {
- if (ch == chars[j].ch) //如果读入的字符已被记录,则将出现次数+1
- {
- chars[j].number++;
- judge = 1;
- break;
- }
- }
- if (judge == 0)
- {
- chars[i].ch = ch;
- chars[i].number++;
- i++;
- }//如果没有出现在已知,那么就创建新的
- }
- }
- n = i;//记录文件中出现的字母数目
- fclose(file);
- }
- void createhuffmantree(huffmannode huffnode[], int n, character chars[]) //构建哈夫曼树
- {
- int i, j, x1, x2;
- int maxweight = 50000;
- for (i = 0; i < 2 * n - 1; i++)
- {
- huffnode[i].weight = chars[i].number;
- huffnode[i].parent = TOP;
- huffnode[i].left = null;
- huffnode[i].right = null;
- huffnode[i].value = chars[i].ch;
- }//初始化,2*n-1的原因是合并n-1次会出现n-1个节点
- for (i = 0; i < n - 1; i++)//有n个节点就要进行n-1次合并
- {
- maxweight = MAXWEIGHT;
- x1 = x2 = 0;
- for (j = 0; j < n + i; j++)
- {
- if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight)
- {
- maxweight = huffnode[j].weight;
- x1 = j;
- }
- }//找到第一小节点
- maxweight = MAXWEIGHT;
- for (j = 0; j < n + i; j++)
- {
- if (huffnode[j].parent == -1 && huffnode[j].weight < maxweight && j != x1)
- {
- maxweight = huffnode[j].weight;
- x2 = j;
- }
- }//找到第二小节点,多了一个判定条件
- huffnode[x1].parent = n + i;
- huffnode[x2].parent = n + i;//将新节点和最小两个节点连接
- huffnode[n + i].weight = huffnode[x1].weight + huffnode[x2].weight;
- huffnode[n + i].left = x1;
- huffnode[n + i].right = x2;
- }
- }
- void Coding(huffmannode huffnode[], huffmancode huffcode[], int n) //构建字符的哈夫曼编码表
- {
- int father;
- int now, i;
- huffmancode temp;
- int p;
- for (i = 0; i < n; i++)
- {
- huffcode[i].front = MAXBIT - 1;
- now = i;
- father = huffnode[i].parent;
- while (father != TOP)//一直到根节点,根节点不存储编码
- {
- if (now == huffnode[father].left) {
- huffcode[i].bit[huffcode[i].front] = 0;//左子树存储0
- }
- else {
- huffcode[i].bit[huffcode[i].front] = 1;//右子树存储1
- }
- now = father;
- father = huffnode[father].parent;
- huffcode[i].front--;
- }
- ++huffcode[i].front;//多减了一位,一定要有自加一次
- }
- }
- void CodingFile(huffmannode huffnode[], huffmancode huffcode[], int n, int& chnum) //对原文本编码并将编码写入文件
- {
- FILE* file1 = NULL;
- FILE* file2 = NULL;
- if ((file1 = fopen("C:\\huff\\word.txt", "r")) == NULL)
- {
- printf("打开word.txt失败!\n");
- exit(0);
- }
- if ((file2 = fopen("C:\\huff\\asc.txt", "w")) == NULL)
- {
- printf("打开asc.txt失败!\n");
- exit(0);
- }
- char ch;
- fscanf(file1, "%c", &ch);
- if (ch != '#')
- {
- for (int i = 0; i < n; i++)
- {
- if (huffnode[i].value == ch)
- {
- for (int j = huffcode[i].front; j < MAXBIT; j++)
- {
- fprintf(file2, "%d", huffcode[i].bit[j]);
- chnum++;
- }
- }
- }
- }
- while (ch != '#') {
- fscanf(file1, "%c", &ch);
- if (ch != '#')
- {
- for (int i = 0; i < n; i++)
- {
- if (huffnode[i].value == ch)
- {
- for (int j = huffcode[i].front; j < MAXBIT; j++)
- {
- fprintf(file2, "%d", huffcode[i].bit[j]);
- chnum++;
- }
- }
- }
- }
- };
- fclose(file1);
- fclose(file2);
- }
- void Decoding(huffmannode huffnode[], int n)
- {
- FILE* fc = NULL, * fd = NULL;
- if ((fc = fopen("C:\\huff\\asc.txt", "r")) == NULL)
- {
- printf("打开asc.txt失败!\n");
- exit(0);
- }
- if ((fd = fopen("C:\\huff\\outcome.txt", "w")) == NULL)
- {
- printf("打开outcome.txt失败!\n");
- exit(0);
- }
- char nums[100000];
- fscanf(fc, "%s", nums);//因为译码文件没有空格
- int i = 0, j = 0, temp;
- for (i = 0; nums[i] != '\0'; i++);
- while (j < i)
- {
- temp = 2 * n - 2;//找到根节点
- while (huffnode[temp].left != null && huffnode[temp].right != null)
- {
- if (nums[j] == '0')
- temp = huffnode[temp].left;
- else
- temp = huffnode[temp].right;
- j++;
- }
- fprintf(fd, "%c", huffnode[temp].value);
- }
- fclose(fc);
- fclose(fd);
- }
- void printout(character ch[], int n)
- {
- int total = 0;
- for (int i = 0; i < n; i++)
- {
- total = total + ch[i].number;
- }
- for (int i = 0; i < n; i++)
- {
- printf("字符%c的频率是%f\n", ch[i].ch,(1.0*ch[i].number) / total);
- }
- }
- void Calculate_yasuo(character ch[], int n, int chnum) //计算压缩率
- {
- int total = 0;
- for (int i = 0; i < n; i++)
- {
- total =total+ ch[i].number;
- }
- printf("=>压缩成功,压缩比%f%%", 1.0*chnum /(8*total * 100));//除8是因为在txt文档中,一个字符占8位
- }
-
- int main()
- {
- huffmannode huffnode[500];
- character chars[500];
- huffmancode huffcode[500];
- int n = 0, chnum = 0;
- cout << "********请选择:1 压缩 2 解压 ********|" << endl;
- cout << "!!!!!注意!!!!! |" << endl;
- cout << "****系统只支持编码或者解压分开进行****|" << endl;
- cout << "************输入0退出系统*************|" << endl;
- cout << "---------------------------------------" << endl;
- int x; cin >> x;
- while (x != 0) {
- if (x == 1) {
- cout << "系统将自动压缩你保存在本地的word.txt文件" << endl;
- tongji(chars, n);
- printout(chars, n);
- createhuffmantree(huffnode, n, chars);
- Coding(huffnode, huffcode, n);
- CodingFile(huffnode, huffcode, n, chnum);
- Calculate_yasuo(chars, n, chnum);
- cout << "------------------------------------------" << endl;
- cout << "********请选择:1 压缩 2 解压 ********|" << endl;
- cout << "!!!!!注意!!!!! |" << endl;
- cout << "****系统只支持编码或者解压分开进行****|" << endl;
- cout << "************输入0退出系统*************|" << endl;
- cout << "---------------------------------------" << endl;
- }
- else {
- cout << "系统将自动为您解压您保存在本地的asc.txt文件" << endl;
- tongji(chars, n);
- Decoding(huffnode, n);
- cout << "=>解压成功,内容已保存在本地outcome.txt文件" << endl;
- cout << "------------------------------------------" << endl;
- cout << "********请选择:1 压缩 2 解压 ********|" << endl;
- cout << "!!!!!注意!!!!! |" << endl;
- cout << "****系统只支持编码或者解压分开进行****|" << endl;
- cout << "************输入0退出系统*************|" << endl;
- cout << "---------------------------------------" << endl;
- }
- cin >> x;
- if (x == 0) {
- cout << "您已成功退出系统!";
- exit(0);
- }
- }//进行循环,保证能编码和译码都能进行
- return 0;
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。