当前位置:   article > 正文

【编译原理】用 C语言 编写的 C语言 词法分析器_词法分析器c语言编写

词法分析器c语言编写

1.目标:

用C语言编写一个C语言源程序的词法分析器(标题收回)

2.要求

[1] 基本要求:识别关键字、运算符、界限符、常量(布尔型、整型)、标识符;
[2] 扩展要求:常量(浮点型)、注释、错误处理。
3.对照表

 

 4.代码(废话少说)

头文件 

#include "gets.h"

是我自己无聊时写的,我经常要用的一些功能的结合体,本文除了文件读取时的getfdstr语句以外没有其他的有关该头文件的语句。

  1. char getfdstr(char filename[],char data[])
  2. {
  3. int size = 0;
  4. char letter[Max] = "";
  5. char w;
  6. //int i,j;
  7. int length = 0;
  8. freopen(filename,"r",stdin);
  9. while(cin >> w)
  10. {
  11. if (w != ' ')
  12. {
  13. letter[length] = w;
  14. length++;
  15. }
  16. }
  17. letter[length] = '\0';
  18. size = strlen(letter);
  19. char *result = new char[size];
  20. strcpy_s(result,size+1,letter);
  21. strcpy_s(data,size+1,result);
  22. return *result;
  23. }

正片开始

  1. #include <iostream>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <conio.h>
  5. #include <windows.h>
  6. #include "gets.h"
  7. #define Max 3068
  8. using namespace std;
  9. enum
  10. {
  11. UNDERLINE = 0,//下划线
  12. LETTER = 1,//字母
  13. NUMBER = 2,//数字
  14. SYMBOL = 3,//符号
  15. SYMBOLERROR = 4,//符号错误
  16. IDENTIFIERERROR = 5//标识符错误
  17. };//枚举 可以不写用数字代替 但我记性不好就用这个法子了
  18. char data[Max];
  19. char letter[Max];
  20. bool isfloat = false;//浮点数判断
  21. bool isbool = false;//布尔判断
  22. bool notes = false;//注释判断
  23. bool isvariable = false;//标识符判断
  24. bool isiderror = false;//错误标识符判断
  25. string key[32]={"char","double","enum","float","int","long","short","signed",
  26. "struct","union","unsigned","void","for","do","while","break","continue",
  27. "if","else","goto","switch","case","default","return","auto","extern","register",
  28. "static","const","sizeof","typedef","volatile"};
  29. int keyNum[32]={1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32};
  30. string keyword[32]={"CHAR","DOUBLE","ENUM","FLOAT","INT","LONG","SHORT","SIGNED",
  31. "STRUCT","UNION","UNSIGNED","VOID","FOR","DO","WHILE","BREAK","CONTINUE",
  32. "IF","ELSE","GOTO","SWITCH","CASE","DEFAULT","RETURN","AUTO","EXTERN","REGISTER"
  33. "STATIC","CONST","SIZEOF","TYPEDEF","VOLATILE"};
  34. //关键字及其对应种别码与注记符
  35. string symbol[33]={"+","-","*","/","%","++","--",">","<","==",
  36. "!=",">=","<=","&&","||","!","=","+=","-=","*=","/=","%=",
  37. ",","(",")","[","]","{","}",";","/*","*/","'"};
  38. int symbolNum[33]={33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,66};
  39. string symbolword[33]={"PLUS","MINUS","MULTI","RDIV","MODULO","INC","DEC","GT","LT","EQ","NEQ",
  40. "GE","LE","AND","OR","NOT","ASSIGN","PLUS_A","MINUS_A","MUL_A","DIV_A","MOD_A","COMMA",
  41. "LR_BRAC","RR_BRAC","LS_BRAC","RS_BRAC","L_PRA","R_PRA","SEMIC","L_ANNO","R_ANNO","QMARK"};
  42. //符号的种别码与注记符
  43. int num;
  44. int length;
  45. //void print(string s,string z,int n);
  46. void Getword();
  47. void error(FILE *fp,int type,string word);
  48. string identify(char s,int n);//返回标识符
  49. string Number(char s,int n);//返回数字
  50. string symbolstr(char s,int n);//返回符号
  51. string variable(char s,int n);//返回标识符
  52. string Keyword(int n);//根据关键词种别码返回关键词
  53. string Symbolword(int n);//根据符号种别码返回符号
  54. bool isNum(char s);//判断是否是数字
  55. bool isLetter(char s);//判断是否是字母
  56. bool issymbol(char s);//判断是否是符号
  57. bool isBool(string s);//因为表里没有bool关键字 但有布尔类型 所以额外加了一个判断
  58. int wordtype(char str);//字符类型判断
  59. int iskeyword(string s);//返回关键词种别码
  60. int isSymbol(string s);//返回符号种别码
  61. int main()
  62. {
  63. char filename[Max] = "input.txt";
  64. /*这里可以加个提示语句然后用scanf接收文件名给filename*/
  65. getfdstr(filename,data);
  66. //自己写的头文件里的东西,意思就是把读取文件将文件内空格去除放到data里
  67. length = strlen(data);
  68. Getword();
  69. getch();
  70. }
  71. //下面就慢慢看吧,有点长懒得解释
  72. void Getword()
  73. {
  74. FILE *fp;
  75. int key;
  76. int count = 0;
  77. fp = fopen("output.txt","w");
  78. if (fp == NULL)
  79. {
  80. printf("文件打开失败!\n");
  81. system("pause");
  82. exit(0);
  83. }
  84. for (num=0;num<length;)
  85. {
  86. char str;
  87. string word;
  88. str = data[num];
  89. key = wordtype(str);
  90. switch (key)
  91. {
  92. case UNDERLINE:
  93. word = variable(str,num);
  94. printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str());
  95. fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str());
  96. isvariable = false;
  97. break;
  98. case LETTER:
  99. word = identify(str,num);
  100. if(notes)
  101. break;
  102. else if(isvariable)
  103. {
  104. if (iskeyword(word))
  105. {
  106. printf("%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
  107. fprintf(fp,"%s (%s,%d) 关键字\n",word.c_str(),Keyword(iskeyword(word)).c_str(),iskeyword(word));
  108. }
  109. else
  110. {
  111. printf("%s (IDE,70) 标识符\n",word.c_str());
  112. fprintf(fp,"%s (IDE,70) 标识符\n",word.c_str());
  113. }
  114. break;
  115. }
  116. else
  117. {
  118. if(!word.compare("bool"))
  119. {
  120. printf("%s (BOOL,%d) 关键字\n",word.c_str(),67);
  121. fprintf(fp,"%s (BOOL,%d) 关键字\n",word.c_str(),67);
  122. }
  123. else if(isbool)
  124. {
  125. printf("%s (CONST_BOOL,%d) 布尔型\n",word.c_str(),67);
  126. fprintf(fp,"%s (CONST_BOOL,%d) 布尔型\n",word.c_str(),67);
  127. isbool = false;
  128. }
  129. else
  130. {
  131. printf("%s (%s,70) 标识符\n",word.c_str(),word.c_str());
  132. fprintf(fp,"%s (%s,70) 标识符\n",word.c_str(),word.c_str());
  133. isvariable = false;
  134. }
  135. break;
  136. }
  137. case NUMBER:
  138. word = Number(str,num);
  139. if(notes)
  140. break;
  141. else if(isiderror)
  142. {
  143. error(fp,IDENTIFIERERROR,word);
  144. break;
  145. }
  146. else
  147. {
  148. if (isfloat)
  149. {
  150. printf("%s (CONST _FLOAT,69) 浮点型\n",word.c_str());
  151. fprintf(fp,"%s (CONST _FLOAT,69) 浮点型\n",word.c_str());
  152. isfloat = false;
  153. }
  154. else
  155. {
  156. printf("%s (CONST _INT,68) 整型\n",word.c_str());
  157. fprintf(fp,"%s (CONST _INT,68) 整型\n",word.c_str());
  158. }
  159. break;
  160. }
  161. case SYMBOL:
  162. word = symbolstr(str,num);
  163. if(notes)
  164. {
  165. if(count == 0)
  166. {
  167. printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  168. fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  169. count++;
  170. }
  171. break;
  172. }
  173. else
  174. {
  175. if(isSymbol(word) == SYMBOLERROR)
  176. {
  177. error(fp,SYMBOLERROR,word);
  178. break;
  179. }
  180. if(!word.compare("+")||!word.compare("-")||!word.compare("*")||!word.compare("/")||!word.compare("%")||
  181. !word.compare("++")||!word.compare("--")||!word.compare(">")||!word.compare("<")||!word.compare("==")||
  182. !word.compare("!=")||!word.compare(">=")||!word.compare("<=")||!word.compare("&&")||!word.compare("||")||
  183. !word.compare("!")||!word.compare("=")||!word.compare("+=")||!word.compare("-=")||!word.compare("*=")||
  184. !word.compare("/=")||!word.compare("%="))
  185. {
  186. printf("%s (%s,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  187. fprintf(fp,"%s (%s,%d) 运算符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  188. break;
  189. }
  190. if(!word.compare("*/"))
  191. {
  192. printf("—————内容被注释—————\n");
  193. fprintf(fp,"—————内容被注释—————\n");
  194. printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  195. fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  196. count = 0;
  197. break;
  198. }
  199. printf("%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  200. fprintf(fp,"%s (%s,%d) 界限符\n",word.c_str(),Symbolword(isSymbol(word)).c_str(),isSymbol(word));
  201. break;
  202. }
  203. }
  204. }
  205. fclose(fp);
  206. }
  207. int wordtype(char str)
  208. {
  209. if ((str <= 'z' && str >= 'a') || (str <='Z' && str >= 'A'))
  210. return LETTER;
  211. if (str <= '9' && str >= '0')
  212. return NUMBER;
  213. if (str == '_')
  214. return UNDERLINE;
  215. else
  216. return SYMBOL;
  217. }
  218. string identify(char s,int n)
  219. {
  220. int j = n+1;
  221. int flag = 1;
  222. string temp1(sizeof(s),s);
  223. while(flag)
  224. {
  225. if(!isvariable)
  226. {
  227. if (!isNum(data[j])&&isLetter(data[j])&&!issymbol(data[j]))
  228. {
  229. string temp2(sizeof(data[j]),data[j]);
  230. temp1.append(temp2);
  231. if (iskeyword(temp1))
  232. {
  233. j++;
  234. num = j;
  235. return temp1;
  236. }
  237. else if(isBool(temp1))
  238. {
  239. j++;
  240. num = j;
  241. isbool = true;
  242. return temp1;
  243. }
  244. else if(!temp1.compare("bool"))
  245. {
  246. j++;
  247. num = j;
  248. return temp1;
  249. }
  250. j++;
  251. }
  252. else if (data[j] == '_')
  253. {
  254. string temp2(sizeof(data[j]),data[j]);
  255. temp1.append(temp2);
  256. j++;
  257. num = j;
  258. isvariable = true;
  259. }
  260. else
  261. {
  262. flag = 0;
  263. }
  264. }
  265. else
  266. {
  267. if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
  268. {
  269. string temp2(sizeof(data[j]),data[j]);
  270. temp1.append(temp2);
  271. j++;
  272. num = j;
  273. }
  274. else
  275. {
  276. return temp1;
  277. }
  278. }
  279. }
  280. num = j;
  281. return temp1;
  282. }
  283. bool isNum(char s)
  284. {
  285. if (s <= '9' && s >= '0')
  286. return true;
  287. else
  288. return false;
  289. }
  290. bool isLetter(char s)
  291. {
  292. if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A'))
  293. return true;
  294. else
  295. return false;
  296. }
  297. int iskeyword(string s)
  298. {
  299. for (int i = 0; i < 32; i++)
  300. {
  301. if (s.compare(key[i])==0)
  302. {
  303. return keyNum[i];
  304. }
  305. }
  306. return 0;
  307. }
  308. string Number(char s,int n)
  309. {
  310. int j = n+1;
  311. int count = 0;
  312. int flag = 1;
  313. string temp1(sizeof(s),s);
  314. while (flag)
  315. {
  316. if(!isiderror)
  317. {
  318. if (isNum(data[j]))
  319. {
  320. string temp2(sizeof(data[j]),data[j]);
  321. temp1.append(temp2);
  322. j++;
  323. }
  324. else if(data[j]=='.'&&count==0)
  325. {
  326. string temp2(sizeof(data[j]),data[j]);
  327. temp1.append(temp2);
  328. j++;
  329. count++;
  330. isfloat=true;
  331. }
  332. else if((data[j]>='a'&&data[j]<='z')||(data[j]>='A'&&data[j]<='Z')||data[j]=='_')
  333. {
  334. string temp2(sizeof(data[j]),data[j]);
  335. temp1.append(temp2);
  336. j++;
  337. isiderror = true;
  338. }
  339. else
  340. {
  341. flag = 0;
  342. }
  343. }
  344. else
  345. {
  346. if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
  347. {
  348. string temp2(sizeof(data[j]),data[j]);
  349. temp1.append(temp2);
  350. j++;
  351. }
  352. else
  353. {
  354. flag = 0;
  355. }
  356. }
  357. }
  358. num = j;
  359. return temp1;
  360. }
  361. string symbolstr(char s,int n)
  362. {
  363. int j = n+1;
  364. string str(sizeof(data[j]),data[j]);
  365. string temp(sizeof(s),s);
  366. if(!temp.compare(">")||!temp.compare("<")||!temp.compare("=")||!temp.compare("+")||!temp.compare("-")||
  367. !temp.compare("!")|!temp.compare("*")||!temp.compare("/")||!temp.compare("%"))
  368. {
  369. if(!str.compare("="))
  370. {
  371. string temp2(sizeof(data[j]),data[j]);
  372. temp.append(temp2);
  373. j++;
  374. }
  375. }
  376. if(!temp.compare("+"))
  377. {
  378. if(!str.compare("+"))
  379. {
  380. string temp2(sizeof(data[j]),data[j]);
  381. temp.append(temp2);
  382. j++;
  383. }
  384. }
  385. if(!temp.compare("-"))
  386. {
  387. if(!str.compare("-"))
  388. {
  389. string temp2(sizeof(data[j]),data[j]);
  390. temp.append(temp2);
  391. j++;
  392. }
  393. }
  394. if(!temp.compare("&"))
  395. {
  396. if(!str.compare("&"))
  397. {
  398. string temp2(sizeof(data[j]),data[j]);
  399. temp.append(temp2);
  400. j++;
  401. }
  402. }
  403. if(!temp.compare("|"))
  404. {
  405. if(!str.compare("|"))
  406. {
  407. string temp2(sizeof(data[j]),data[j]);
  408. temp.append(temp2);
  409. j++;
  410. }
  411. }
  412. if(!temp.compare("/"))
  413. {
  414. if(!str.compare("*"))
  415. {
  416. string temp2(sizeof(data[j]),data[j]);
  417. temp.append(temp2);
  418. j++;
  419. notes = true;
  420. }
  421. }
  422. if(!temp.compare("*"))
  423. {
  424. if(!str.compare("/"))
  425. {
  426. string temp2(sizeof(data[j]),data[j]);
  427. temp.append(temp2);
  428. j++;
  429. notes = false;
  430. }
  431. }
  432. num=j;
  433. return temp;
  434. }
  435. int isSymbol(string s)
  436. {
  437. string temp(1,'"');
  438. if (s.compare(temp) == 0)
  439. return 65;
  440. for (int i = 0;i < 33 ;i++)
  441. {
  442. if (s.compare(symbol[i])==0)
  443. return symbolNum[i];
  444. }
  445. return SYMBOLERROR;
  446. }
  447. bool issymbol(char s)
  448. {
  449. if ((s <= 'z' && s >= 'a') || (s <='Z' && s >= 'A')||(s <= '9' && s >= '0'))
  450. return false;
  451. else
  452. return true;
  453. }
  454. string Keyword(int n)
  455. {
  456. if (n>=0&&n<=32)
  457. return keyword[n-1];
  458. }
  459. string Symbolword(int n)
  460. {
  461. string result(1,'"');
  462. if(n == 65)
  463. return result;
  464. if(n>=33&&n<65)
  465. return symbolword[n-33];
  466. if(n == 66)
  467. return symbolword[33];
  468. }
  469. bool isBool(string s)
  470. {
  471. if(!s.compare("true")||!s.compare("false")||!s.compare("TRUE")||!s.compare("FALSE"))
  472. return true;
  473. return false;
  474. }
  475. string variable(char s,int n)
  476. {
  477. int j = n+1;
  478. int flag = 1;
  479. string temp1(sizeof(s),s);
  480. while(flag)
  481. {
  482. if (((!isNum(data[j])||!isLetter(data[j]))&&!issymbol(data[j]))||data[j]=='_')
  483. {
  484. string temp2(sizeof(data[j]),data[j]);
  485. temp1.append(temp2);
  486. j++;
  487. }
  488. else
  489. {
  490. flag = 0;
  491. }
  492. }
  493. num = j;
  494. return temp1;
  495. }
  496. void error(FILE *fp,int type,string word)
  497. {
  498. switch(type)
  499. {
  500. case SYMBOLERROR:
  501. printf("ERROR! ERRORTYPE:SymbolError! %s\n",word.c_str());
  502. fprintf(fp,"ERROR! ERRORTYPE:SymbolError! %s\n",word.c_str());
  503. break;
  504. case IDENTIFIERERROR:
  505. printf("ERROR! ERRORTYPE:IdentifierError! %s\n",word.c_str());
  506. fprintf(fp,"ERROR! ERRORTYPE:IdentifierError! %s\n",word.c_str());
  507. break;
  508. }
  509. }

5.结果截图

输入:

 控制台输出:

文件输出:

 

 

到这功能就差不多完成了,C语言我也没学多久,所以可能有些地方有疏漏,希望大家多多指正。

 

参考文章:

(13条消息) 词法分析器(分析C语言)_flamingobaby的博客-CSDN博客_词法分析c语言

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小小林熬夜学编程/article/detail/294243
推荐阅读
相关标签
  

闽ICP备14008679号