本次实验的题目是就是分析一个文本文件中各个次出现的频率,并把最高的10个单词打印出来。
拿到本次实验题目实在第一周上软件工程课的时候,看到这个题目,就想起来我们之前做过一些统计单词数量的程序。由于之前有点事,所以实验也没有提前准备,就是昨天(2.28下午)上级的时候才开始的。昨天弄到了在dos下面输入英文句子的程度了,回到宿舍开始弄,但是没进展,主要是卡在文本文件中读取是一个字母一个字母的读取,而在dos下输入的时候用的是string类型,就是在char向string转换的时候搞不明白了。最后就是定义个一个bool类型的函数bool readFileToString()。
bool readFileToString(string file_name, string& fileData) { ifstream file(file_name.c_str(), std::ifstream::binary); if(file) { // Calculate the file's size, and allocate a buffer of that size. file.seekg(0, file.end); const int file_size = file.tellg(); char* file_buf = new char [file_size+1]; //make sure the end tag \0 of string. memset(file_buf, 0, file_size+1); // Read the entire file into the buffer. file.seekg(0, ios::beg); file.read(file_buf, file_size); if(file) { fileData.append(file_buf); } else { std::cout << "error: only " << file.gcount() << " could be read"; fileData.append(file_buf); return false; } file.close(); delete []file_buf; } else { return false; } return true; }
本文中主要用到了文件的读取,定义了class Words类,class WordList类和class Text,用到了排序。
具体的程序代码如下:
#include <iostream> #include <string> #include <ctype.h> #include <iomanip> #include <fstream> using namespace std; #define MAX 2000 char pText[MAX]; char ch; FILE *fp,*out; //单词类 class Words { public: Words(string str) { count=1; word=str; next=NULL; } int count; //出现的次数 string word; Words *next; }; class WordList { public: void AddWord(string word); //添加单词 bool WordExist(string word); //判断单词是否存在 void WordPrint(); //打印出现次数最多的前五个单词 int getLength(){ return length;}//单词链表长度 int getTotal(){ return total;} //输入单词总数 WordList(); ~WordList(); private: Words *first; Words *last; int length; int total; }; WordList::WordList() { first=new Words(" "); first->next=NULL; last=first; length=0; total=0; } WordList::~WordList() { Words *p=first; Words *q; while(p!=NULL) { q=p; p=p->next; delete q; } } void WordList::AddWord(string word) { if( !WordExist(word) )//单词不存在 { Words *node=new Words(word); last->next=node; last=node; last->next=NULL; length++; } } bool WordList::WordExist(string word) { Words *p=first->next; total++; while(p!=NULL) { if( p->word == word ) { p->count++; return true; } p=p->next; } return false; } void WordList::WordPrint() { int n=10; if( length < n) { n=length; cout<<"\n不同的单词不足10个\n"; } cout<<setw(20)<< setiosflags( ios::left )<<"单词"<<setw(20)<< setiosflags( ios::left ) <<"出现次数"<<setw(20)<< setiosflags( ios::left )<<"频率"<<endl; for( int i=0; i<n; i++ ) { Words *p=first->next; string str=p->word; int max=p->count; Words* pmax=p; while(p!=NULL) { if( p->count > max ) { max = p->count ; str=p->word; pmax=p; } p=p->next; } pmax->count=0; cout<<setw(20)<< setiosflags( ios::left )<<str<<setw(20)<< setiosflags( ios::left ) <<max<<setw(20)<< setiosflags( ios::left )<<1.0*max/total<<endl; } } class Text { string txt; WordList mywords; public: void PutIn(); void PutOut(); void Run(); }; bool readFileToString(string file_name, string& fileData) { ifstream file(file_name.c_str(), std::ifstream::binary); if(file) { // Calculate the file's size, and allocate a buffer of that size. file.seekg(0, file.end); const int file_size = file.tellg(); char* file_buf = new char [file_size+1]; //make sure the end tag \0 of string. memset(file_buf, 0, file_size+1); // Read the entire file into the buffer. file.seekg(0, ios::beg); file.read(file_buf, file_size); if(file) { fileData.append(file_buf); } else { std::cout << "error: only " << file.gcount() << " could be read"; fileData.append(file_buf); return false; } file.close(); delete []file_buf; } else { return false; } return true; } void Text::PutIn() { /* readFileToString("c:/english.txt", txt); cout<<"File data is:\r\n"<<txt<<endl; cout<<"Failed to open the file, please check the file path."<<endl; */ int k=0; char infile[20]; cout<<"输入读入文件的路径:"<<endl; scanf("%s",infile); readFileToString(infile, txt); if((fp=fopen(infile,"r"))==NULL) { printf("文件无法打开!!\n"); exit(0); } while(!feof(fp)) { ch=fgetc(fp); pText[k++]=ch; } fclose(fp); k--; pText[k]='#'; } void Text::Run() { int i=0; while( i<txt.length()) { string temp; while( !isalpha(txt[i]) && i<txt.length()) { i++; } while( isalpha(txt[i]) && i<txt.length())//如果参数是字母字符 { temp=temp+txt[i]; i++; } if(i<txt.length()) { mywords.AddWord(temp); } } } void Text::PutOut() { cout<<"*****************************************"<<endl; cout<<"文件中共"<<mywords.getTotal()<<" 个单词,其中"<<mywords.getLength()<<" 个不同的单词"<<endl; cout<<"*****************************************"<<endl; cout<<"\n出现频率最高的10个单词: \n"; mywords.WordPrint(); } void main() { Text mytest; mytest.PutIn(); mytest.Run(); mytest.PutOut(); system("pause"); }