赞
踩
1、词法分析功能
输入:所给文法的源程序字符串。
输出:二元组(syn,token或sum)构成的序列。其中,
syn为单词种别码。
Token为存放的单词自身字符串。
Sum为整型常量。
具体实现时,可以将单词的二元组用结构进行处理。
2、待分析的C语言子集的词法
1)关键字
main if then while do static int double struct break else long switch case typedef char return const float short continue for void default sizeof
所有的关键字都是小写。
2)运算符和界符
+ - * / : < <> <= > >= = ; ( ) #
3)其他标记ID和NUM
通过以下正规式定义其他标记:
ID→letter(letter|digit)*
NUM→digit digit*
letter→a|…|z|A|…|Z
digit→0|…|9…
4)空格由空白、制表符和换行符组成
空格一般用来分隔ID、NUM、专用符号和关键字,词法分析阶段通常被忽略。
3、各种单词符号对应的种别码
表1 各种单词符号的种别码
单词符号 种别码 单词符号 种别码 单词符号 种别码 单词符号 种别码 单词符号 种别码
# 0 else 11 for 22 == 33 ) 44
main 1 long 12 void 23 < 34 { 45
if 2 switch 13 default 24 <> 35 } 46
then 3 case 14 sizeof 25 <= 36 , 47
while 4 typedef 15 ID 26 > 37
do 5 char 16 NUM 27 >= 38
static 6 return 17 + 28 = 39
int 7 const 18 - 29 [ 40
double 8 float 19 * 30 ] 41
struct 9 short 20 / 31 ; 42
break 10 continue 21 ** 32 ( 43
上述为一些基本符号,可以自己另加。
状态转换图:
代码:
- #include <iostream>
- #include <stdio.h>
- #include <stdlib.h>
- #include <cstring>
- #include <string>
- using namespace std;
-
- FILE *fp; // 文件指针,处理源文件
- char *ScanBuffer; // 扫描缓冲区,一分为二,每次一半存放预处理后的字符串
- int capacity; // 扫描缓冲区容量
- int BufferFlag; // 状态标志,0 表示使用缓冲区的左半区,1 表示使用右半区
- int PreprocessFlag; // 预处理的状态标志
- string rwtab[25]={"main","if","then","while","do"," static",
- "int"," double","struct","break","else","long","switch",
- "case","typedef","char","return","const","float","short",
- "continue","for","void","default","sizeof"};
-
- // init fp, ScanBuffer, BufferFlag,SP,EP,PreprocessFlag
- void init(){
- fp = fopen("test.txt","r");
- if(fp == NULL) exit(0);
- capacity = 120;
- ScanBuffer = new char[capacity]; // 缓冲区分配长度 :120。
- memset(ScanBuffer,0,sizeof(ScanBuffer));
- BufferFlag = 0;
- PreprocessFlag = 0;
- }
-
- void close(){
- fclose(fp);
- delete ScanBuffer;
- }
-
- void Preprocess(){
- //参见词法分析一
- }
-
-
- /*
- 分析器变量
- SP: 分析器的起点指示器
- EP: 分析器的搜索指示器
- syn: 种别编码
- token: 存放一个处理的字符串
- sum: 常量
- status:状态
- */
-
- int SP = 0; int EP = 0;
- int syn = -1; string token = "";
- int sum = 0; int status = 0;
-
- // 字母
- int IsLetter(char c){
- if(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') return 1;
- return 0;
- }
-
- //数字
- int IsDigit(char c){
- if(c >= '0' && c <= '9') return 1;
- return 0;
- }
-
- // 关键字或者是变量
- void Key(){
- token = "";
- if(SP < EP){
- for(int i = SP; i < EP; i++) token.push_back(ScanBuffer[i]);
- }else{
- for(int i = SP; i < capacity; i++) token.push_back(ScanBuffer[i]);
- for(int i = 0; i < EP; i++) token.push_back(ScanBuffer[i]);
- }
- for(int i = 0; i < 25; i++) if(token.compare(rwtab[i]) == 0) {syn = i+1; return;}
- syn = 26; return;
- }
-
- void Number(){
- sum = 0;
- if(SP < EP){
- for(int i = SP; i < EP; i++) sum = sum * 10 + ScanBuffer[i] - '0';
- }else{
- for(int i = SP; i < capacity; i++) sum = sum * 10 + ScanBuffer[i] - '0';
- for(int i = 0; i < EP; i++) sum = sum * 10 + ScanBuffer[i] - '0';
- }
- syn = 27;
- }
-
- // 不合法变量的处理,例如以数字开头的变量
- void InvalidToken(){
- token = "";
- if(SP < EP){
- for(int i = SP; i < EP; i++) token.push_back(ScanBuffer[i]);
- }else{
- for(int i = SP; i < capacity; i++) token.push_back(ScanBuffer[i]);
- for(int i = 0; i < EP; i++) token.push_back(ScanBuffer[i]);
- }
- while(IsLetter(ScanBuffer[EP]) || IsDigit(ScanBuffer[EP])) {token.push_back(ScanBuffer[EP]); EP++;}
- }
-
- // 单个字符 ,例如‘<’
- void OneCharacter(){
- switch(status){
- case 11: syn = 39; break;
- case 16: syn = 30; break;
- case 22: syn = 34; break;
- case 24: syn = 37; break;
- }
- }
- // 双字符 ,例如‘<=’
- void TwoCharacter(){
- switch(status){
- case 12: syn = 33; break;
- case 17: syn = 32; break;
- case 20: syn = 35; break;
- case 21: syn = 36; break;
- case 25: syn = 38; break;
- }
- }
-
- void CharacterMatch(char ch){
- switch(ch){
- case ' ': break;
- case ';': {status = 5; syn = 42; break;}
- case '(': {status = 6; syn = 43; break;}
- case ')': {status = 7; syn = 44; break;}
- case '[': {status = 8; syn = 40; break;}
- case ']': {status = 9; syn = 41; break;}
- case '{': {status = 27;syn = 45; break;}
- case '}': {status = 28;syn = 46; break;}
- case ',': {status = 29;syn = 47; break;}
- case '+': {status = 13;syn = 28; break;}
- case '-': {status = 14;syn = 29; break;}
- case '/': {status = 18;syn = 31; break;}
- case '#': {status = 26;syn = 0; break;}
- case '=': {status = 10; break;}
- case '*': {status = 15; break;}
- case '<': {status = 19; break;}
- case '>': {status = 23; break;}
- default: {status = -1; break;}
- }
- }
-
- void StatusHandle(){
- switch(status){
- case -2: {
- InvalidToken(); cout<<"invalid token: "<<token<<endl; status = 0; SP = EP;break;
- }
- case -1: {
- printf("invalid character: %c\n",ScanBuffer[EP]); EP++; status = 0; SP = EP;break;
- }
- case 1: case 3: case 10: case 15: case 19: case 23:{
- EP++; break;
- }
- case 0:{
- EP++; SP = EP; break;
- }
- case 2:{
- Key(); cout<<"("<<syn<<","<<token<<")"<<endl; SP = EP; status = 0; break;
- }
- case 4:{
- Number();cout<<"("<<syn<<","<<sum<<")"<<endl; SP = EP; status = 0; break;
- }
- case 5:case 6:case 7:case 8:case 9:case 13:case 14:case 18:case 26:case 27:case 28:case 29:{
- cout<<"("<<syn<<","<<ScanBuffer[SP]<<")"<<endl; EP++; SP = EP; status = 0; break;
- }
- case 11: case 16: case 22: case 24:{
- OneCharacter(); cout<<"("<<syn<<","<<ScanBuffer[SP]<<")"<<endl; SP++; EP = SP; status = 0; break;
- }
- case 12: case 17: case 20: case 21: case 25:{
- TwoCharacter();
- cout<<"("<<syn<<","<<ScanBuffer[SP]<<ScanBuffer[EP]<<")"<<endl;
- EP++; SP = EP;
- status = 0; break;
- }
- }
- }
-
- // 状态转换图
- void Analyse(char ch){
- switch(status){
- case 0:{
- if(IsLetter(ch)) status = 1;
- else if(IsDigit(ch)) status = 3;
- else CharacterMatch(ch);
- break;
- }
- case 1:{
- if(IsLetter(ch) || IsDigit(ch)) break;
- else status = 2; break;
- }
- case 3:{
- if(IsDigit(ch)) break;
- else if(IsLetter(ch)){
- status = -2; break;
- }else {
- status = 4; break;
- }
- }
- case 10:{
- if(ch == '=') {status = 12; break;}
- else status = 11; break;
- }
- case 15:{
- if(ch == '*') {status = 17; break;}
- else status = 16; break;
- }
- case 19:{
- if(ch == '>') {status = 20; break;}
- else if(ch == '='){status = 21; break;}
- else status = 23;break;
- }
- case 23:{
- if(ch == '=') {status = 25; break;}
- else status = 24; break;
- }
- }
- StatusHandle();
- }
-
- void Analyzer(){
- while(!feof(fp)){
- Preprocess(); BufferFlag = BufferFlag ^ 1;
- if(BufferFlag){
- EP = 0; // 使用左半缓冲区
- if(SP == capacity) SP = 0; // 防止SP越界
- while(ScanBuffer[EP] != -1 && EP < capacity/2){
- Analyse(ScanBuffer[EP]);
- }
- }else{
- while(ScanBuffer[EP] != -1 && EP < capacity){
- Analyse(ScanBuffer[EP]);
- }
- }
- }
- }
-
- int main(){
- init();
- Analyzer();
- close();
- return 0;
- }
测试用例:
// 单行注释
/*
多行注释
*/
int ArraySum(int **a, int m, int n){
int 9sum = 0;
for(int i = 0; i < m; i = i + 1){
for(int j = 0; j < n; j = j + 1){
sum = sum + a[i][j];
}
}
return sum;
}
测试结果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。