赞
踩
编写一个程序对使用 C–语言书写的源代码进行词法分析,并打印分析结果。学习词法分析工具 Flex的使用方法,并使用 C 语言完成程序。
编写 flex 文件,规范正则表达式进行单词匹配:
第一部分:正则表达式
%option yylineno digit [0-9] letter [a-zA-Z] unsignedint [1-9]{digit}* SPACE [ ] LF [\n\r\f\v\t] FLOAT [+-]?({digit}+)?\.{digit}+?([eE][+-]?(0|[0-9]*))? ILLEGALFLOAT ([+-]?({digit}+)?[eE][+-]?{digit}*\.{digit}*)|([+-]?({digit}+)?[eE]{letter}*) ID ({letter}|_)({letter}|_|{digit})* SEMI ; COMMA , ASSIGNOP = RELOP (>|<|>=|<=|==|!=) PLUS \+ MINUS \- STAR \* DIV \/ AND && OR \|\| DOT \. NOT ! TYPE (int|float) LP \( RP \) LB \[ RB \] LC \{ RC \} KEYWORD if|else|struct|return|while INT8 [+-]?0(0|([1-7][0-7]*)) ILLINT8 0([0-7]*)?[8-9]+{digit}* INT16 [+-]?(0(x|X))(0|([1-9A-Fa-f][0-9A-Fa-f]*)) ILLINT16 [+-]?(0(x|X))(0|([1-9A-Fa-f][0-9A-Fa-f]*))?([g-zG-Z]+)({digit}|{letter})* INT [+-]?(0|{unsignedint}) NOTE (\/\/.*)|(\/\*(.|\r\n)*\*\/)
第二部分:用户定义部分
%{
int count = 1;
%}
count 用于记录扫描的列数,以便后续输出错误位置。
第三部分:匹配操作
%% {KEYWORD} {printf("KEYWORD at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {DOT} {printf("DOT at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {FLOAT} {printf("FLOAT at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {ILLEGALFLOAT} {printf("Error Type A at line %d,char %d: Illgal float number: '%s'.\n", yylineno,count,yytext);count+=yyleng;} {INT16} {printf("INT16 at line %d,char %d:%s\n", yylineno,count, yytext);count+=yyleng;} {ILLINT16} {printf("Error Type A at line %d,char %d: Illgal hexadecimal number: '%s'.\n", yylineno,count, yytext);count+=yyleng;} {INT8} {printf("INT8 at line %d,char %d:%s\n", yylineno,count, yytext);count+=yyleng;} {ILLINT8} {printf("Error Type A at line %d,char %d: Illgal octal number: '%s'.\n", yylineno,count,yytext);count+=yyleng;} {INT} {printf("INT data at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {NOTE} {printf("NOTE at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {SEMI} {printf("SEMI at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {COMMA} {printf("COMMA at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {ASSIGNOP} {printf("ASSIGNOP at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {RELOP} {printf("RELOP at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {PLUS} {printf("PLUS at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {MINUS} {printf("MINUS at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {STAR} {printf("STAR at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {DIV} {printf("DIV at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {AND} {printf("AND at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {OR} {printf("OR at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {NOT} {printf("NOT at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {TYPE} {printf("TYPE at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {LP} {printf("LP at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {RP} {printf("RP at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {LB} {printf("LB at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {RB} {printf("RB at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {LC} {printf("LC at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {RC} {printf("RC at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {ID} {printf("ID at line %d,char %d:%s\n", yylineno,count,yytext);count+=yyleng;} {SPACE} {count++;} {LF} {count=1;} . {printf("Error Type A at line %d,char %d: Myterious character: '%s'.\n", yylineno,count,yytext);count+=yyleng;} %%
这一部分规定了匹配到给定的正则表达式后进行的操作。大部分为打印输出,小部分只规定了内置变量的操作。例如匹配到 SPACE,使得行号 + 1,其他啥也不做。
main函数部分
int main(int argc, char* argv[]) {
if (argc > 1) {
if(!(yyin = fopen(argv[1], "r"))) {
perror(argv[1]);
return 1;
}
while (yylex() != 0);
}
return 0;
}
main 函数读取文件,依次匹配每一个 token,并进行相应的打印输出。
0547 089 0x5c4ad 0X345 0X1D7E 0x4m4
1.23 1.3e0 13.5e9 2.e-23 3. .08 2er 15e 1e2.5
// note1
/* this
is a long long comment
*/
h = 5 / 2 // note2
}
输入指令:
>>> flex lizi.l
>>> gcc lex.yy.c -lfl -o scanner
得到可执行程序 scanner,我们就可以用这个程序进行编译文件:
>>> ./scanner text.cmm
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。