当前位置:   article > 正文

编程实战:类C语法的编译型脚本解释器(二)Token和变量_c--语言token类型

c--语言token类型

初级代码游戏的专栏介绍与文章目录-CSDN博客

我的github:codetoys,所有代码都将会位于ctfc库中。已经放入库中我会指出在库中的位置。

这些代码大部分以Linux为目标但部分代码是纯C++的,可以在任何平台上使用。


系列入口:编程实战:类C语法的编译型脚本解释器(系列)-CSDN博客

        现在开始解释所有的设计思想和与源代码。先从外围入手,最后会进入到一个巨大的解析语法的类。

        本文介绍TOKEN和变量。

目录

一、TOKEN

1.1 定义Token类型

1.2 将脚本拆解为Token

1.3 TryGetKeyword识别关键字

1.4 TryGetNumber识别数值

1.5 其余TryGetXXXX略 

二、变量


一、TOKEN

1.1 定义Token类型

        token是编程语言的基本单元,是最小单位,包括分隔符、标识符、操作符、关键字、数字、字符串字面值,不包括空白。在C和类C语法中,空白字符包括换行都会被忽略(但预处理程序并非如此,所以预处理是额外的东西)。

        所有编译程序首先都会把源代码分解成一系列token,本代码也是如此。token的相关定义如下:

  1. enum { TOKEN_BUF_LEN = 128 };//仅用于预定义的关键字、运算符,其它标识符任意长度
  2. //语法标记,去除空白之后的每个元素
  3. struct Token
  4. {
  5. enum types { DELIMITER = 0, OPERATOR, IDENTIFIER, NUMBER, KEYWORD, STRING };
  6. types type;//类型
  7. string text;//文本
  8. size_t pos;//在源代码中的位置
  9. Token(types _type, char const* _text, size_t _pos) :type(_type), text(_text), pos(_pos) {}
  10. string ToString()const
  11. {
  12. STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "DELIMITER","OPERATOR","IDENTIFIER","NUMBER","KEYWORD","STRING" };//必须与types对应
  13. char buf[TOKEN_BUF_LEN * 2];
  14. string ret;
  15. sprintf(buf, "%03ld %-12s ", pos, typestr[type]);
  16. ret = buf;
  17. ret += text.c_str();
  18. return ret;
  19. }
  20. };

        每个Token包含类型和文本(对应源代码中的表现形式),同时为了调试需要,增加了pos记录在脚本中的位置。

1.2 将脚本拆解为Token

        作为编译的第一步,显然是将源代码分解为token。

        这一步由一个类来实现:

  1. class CTokens
  2. {
  3. public:
  4. vector<Token > m_tokens;//解析出的语法元素
  5. bool ToTokens(string& source)
  6. {
  7. string::size_type pos = 0;
  8. while (GetToken(source, pos));
  9. return true;
  10. }
  11. };

        解析出的token保存在m_tokens中,而函数ToTokens()仅仅是循环调用GetToken()解析出一个一个token而已。 

        GetToken()是关键的主控函数:

  1. bool GetToken(string& source, string::size_type& pos)
  2. {
  3. Token::types type;
  4. string token;
  5. char c;
  6. bool isInComment = false;
  7. while (pos < source.size())
  8. {
  9. c = source[pos];
  10. if (isInComment)
  11. {
  12. if ('\n' == c)
  13. {
  14. isInComment = false;
  15. }
  16. ++pos;
  17. continue;
  18. }
  19. if ('/' == c && pos + 1 < source.size() && '/' == source[pos + 1])
  20. {
  21. isInComment = true;
  22. pos += 2;
  23. continue;
  24. }
  25. if (!IsBlank(c))break;
  26. ++pos;
  27. }
  28. if (source.size() == pos)return false;
  29. if (TryGetKeyword(source.c_str(), pos, token))
  30. {
  31. type = Token::KEYWORD;
  32. }
  33. else if (TryGetNumber(source.c_str(), pos, token))
  34. {
  35. type = Token::NUMBER;
  36. }
  37. else if (TryGetString(source.c_str(), pos, token))
  38. {
  39. type = Token::STRING;
  40. }
  41. else if (TryGetDelimiter(source.c_str(), pos, token))
  42. {
  43. type = Token::DELIMITER;
  44. }
  45. else if (TryGetOperator(source.c_str(), pos, token))
  46. {
  47. type = Token::OPERATOR;
  48. }
  49. else if (TryGetIdentifier(source.c_str(), pos, token))
  50. {
  51. type = Token::IDENTIFIER;
  52. }
  53. else
  54. {
  55. CmyException::Throw(__FILE__, __LINE__, source.c_str(), pos, "无法识别的符号");
  56. return false;
  57. }
  58. m_tokens.push_back(Token(type, token.c_str(), pos - token.size()));
  59. return true;
  60. }

        这个函数的流程不复杂,先跳过注释(仅支持单行注释),然后依次尝试每种token,每种尝试如果成功会修改当前位置pos(通过引用参数),如果失败则不会修改pos。

        TryGetXXXX这一组函数每个都不复杂,不过调用顺序有名堂,关键字是最优先的,这就保证关键字不可能被用作变量名。

1.3 TryGetKeyword识别关键字

        这个复杂一些,由几个函数组合而成:

  1. //headset最后一个必须是空串
  2. bool IsStartWith(char const* str, char const (*headset)[TOKEN_BUF_LEN], string& ret)const
  3. {
  4. long i = 0;
  5. ret = "";
  6. while (headset[i][0] != '\0')
  7. {
  8. size_t keylen = strlen(headset[i]);
  9. if (0 == strncmp(headset[i], str, keylen))
  10. {
  11. if (ret.size() < strlen(headset[i]))ret = headset[i];
  12. }
  13. ++i;
  14. }
  15. return ret.size() != 0;
  16. }
  17. bool IsKeyword(char const* str, string& key)const
  18. {
  19. STATIC_C char const buf[][TOKEN_BUF_LEN] = {
  20. "asm","default","float","operator","static_cast","union",
  21. "auto","delete","for","private","struct","unsigned",
  22. "bool","do","friend","protected","switch","using",
  23. "break","double","goto","public","template","virtual",
  24. "case","dynamic_cast","if","register","this","void",
  25. "catch","else","inline","reinterpret_cast","throw","volatile",
  26. "char","enum","int","return","true","wchar_t",
  27. "class","explicit","long","short","try","while",
  28. "const","export","mutable","signed","typedef",
  29. "const_cast","extern","namespace","sizeof","typeid",
  30. "continue","false","new","static","typename","string",
  31. ""
  32. };//必须以空串结尾
  33. return IsStartWith(str, buf, key);
  34. }
  35. bool TryGetKeyword(char const* source, string::size_type& pos, string& ret)
  36. {
  37. string key;
  38. string nextkey;
  39. size_t keylen;
  40. if (IsKeyword(source + pos, key))
  41. {
  42. keylen = key.size();
  43. if ('\0' == source[pos + keylen] || IsBlank(source[pos + keylen]) || IsDelimiter(source[pos + keylen]) || IsOperator(source + pos + keylen, nextkey))
  44. {
  45. ret = key;
  46. pos += keylen;
  47. return true;
  48. }
  49. }
  50. return false;
  51. }

         规则其实也很简单:以关键字开头并且其后是{空白、分隔符、操作符}则为一个关键字。

1.4 TryGetNumber识别数值

        数值是由数字或点开始的字母数字小数点的串,同时还需要符合一些规则,代码里分两步进行,第一步识别出串,第二步则将串根据各种规则转换为数值:

  1. //以数字或点开头的串
  2. bool TryGetNumber(char const* source, string::size_type& pos, string& ret)
  3. {
  4. ret = "";
  5. char c = source[pos];
  6. if (c >= '0' && c <= '9' || c == '.' && source[pos + 1] >= '0' && source[pos + 1] <= '9')
  7. {
  8. while ((c = source[pos]) != '\0')
  9. {
  10. if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || '.' == c || '_' == c)
  11. {
  12. }
  13. else
  14. {
  15. break;
  16. }
  17. ret += c;
  18. ++pos;
  19. }
  20. }
  21. return ret.size() != 0;
  22. }
  23. bool NumberToVariable(char const* source, Variable& var)
  24. {
  25. char* endptr;
  26. if (IsCharIn('.', source) || IsCharIn('e', source) || IsCharIn('E', source))
  27. {
  28. var.type = Variable::DOUBLE;
  29. var.dValue = strtod(source, &endptr);
  30. }
  31. else
  32. {
  33. var.type = Variable::LONG;
  34. long prefix = 0;
  35. long radix = 10;
  36. if (strlen(source) >= 1 && '0' == source[0])
  37. {
  38. if (strlen(source) >= 2 && ('x' == source[1] || 'X' == source[1]))
  39. {
  40. radix = 16;
  41. prefix = 2;
  42. }
  43. else
  44. {
  45. radix = 8;
  46. prefix = 1;
  47. }
  48. }
  49. var.lValue = strtol(source + prefix, &endptr, radix);
  50. }
  51. if (strlen(endptr) != 0)
  52. {
  53. if (Variable::DOUBLE == var.type && (0 == stricmp(endptr, "f") || 0 == stricmp(endptr, "l"))
  54. || Variable::LONG == var.type && (0 == stricmp(endptr, "u") || 0 == stricmp(endptr, "l") || 0 == stricmp(endptr, "i64")))
  55. {
  56. return true;
  57. }
  58. string str;
  59. str = "数值常量格式错误 ";
  60. str += endptr;
  61. CException::Throw(__FILE__, __LINE__, source, endptr - source, str.c_str());
  62. return false;
  63. }
  64. return true;
  65. }

1.5 其余TryGetXXXX略 

二、变量

        我非常痛恨无类型变量,比如JavaScript,所以我在这个脚本里面使用强类型。

        变量类型做了简化,分为long、double和string。通过一个类存储所有的变量,代码如下:

  1. //变量
  2. struct Variable
  3. {
  4. enum types { NULLVARIABLE = 0, LONG, DOUBLE, STRING };
  5. types type;
  6. bool isconst;
  7. long lValue;
  8. double dValue;
  9. string strValue;
  10. Variable() :type(NULLVARIABLE), isconst(false), lValue(0), dValue(0.) {}
  11. bool isNull() { return type == NULLVARIABLE; }
  12. bool isNumber() { return type == LONG || type == DOUBLE; }
  13. bool isString() { return type == STRING; }
  14. void clear()
  15. {
  16. type = NULLVARIABLE;
  17. isconst = false;
  18. lValue = 0;
  19. dValue = 0;
  20. strValue = "";
  21. }
  22. void initvalue()
  23. {
  24. lValue = 0;
  25. dValue = 0;
  26. strValue = "";
  27. }
  28. Variable& operator = (long v)
  29. {
  30. char buf[256];
  31. if (NULLVARIABLE == type)type = LONG;
  32. switch (type)
  33. {
  34. case LONG:lValue = v; break;
  35. case DOUBLE:dValue = v; break;
  36. case STRING:
  37. sprintf(buf, "%ld", v);
  38. strValue = buf;
  39. break;
  40. default:break;
  41. }
  42. return *this;
  43. }
  44. Variable& operator = (double v)
  45. {
  46. char buf[256];
  47. if (NULLVARIABLE == type)type = DOUBLE;
  48. switch (type)
  49. {
  50. case LONG:lValue = (long)v; break;
  51. case DOUBLE:dValue = v; break;
  52. case STRING:
  53. gcvt(v, 200, buf);
  54. strValue = buf;
  55. break;
  56. default:break;
  57. }
  58. return *this;
  59. }
  60. Variable& operator = (string const& v)
  61. {
  62. if (NULLVARIABLE == type)type = STRING;
  63. switch (type)
  64. {
  65. case LONG:lValue = atol(v.c_str()); break;
  66. case DOUBLE:dValue = atof(v.c_str()); break;
  67. case STRING:strValue = v; break;
  68. default:break;
  69. }
  70. return *this;
  71. }
  72. Variable& operator = (Variable const& v)
  73. {
  74. if (NULLVARIABLE == type)type = v.type;
  75. switch (type)
  76. {
  77. case LONG:lValue = v.GetLong(); break;
  78. case DOUBLE:dValue = v.GetDouble(); break;
  79. case STRING:strValue = v.GetString(); break;
  80. default:break;
  81. }
  82. return *this;
  83. }
  84. Variable operator-()const
  85. {
  86. Variable tmp = *this;
  87. switch (type)
  88. {
  89. case LONG:tmp.lValue = -lValue; break;
  90. case DOUBLE:tmp.dValue = -dValue; break;
  91. default:break;
  92. }
  93. return tmp;
  94. }
  95. //eva=true则是为赋值提升,结果以左边为准
  96. static types typeUpgrade(types a, types b, bool eva = false)
  97. {
  98. if (NULLVARIABLE == a || NULLVARIABLE == b)return NULLVARIABLE;
  99. if (LONG == a && LONG == b)return LONG;
  100. if (STRING == a && STRING == b)return STRING;
  101. if (DOUBLE == a && DOUBLE == b)return DOUBLE;
  102. if (!eva)
  103. {
  104. if (DOUBLE == a && LONG == b)return DOUBLE;
  105. if (LONG == a && DOUBLE == b)return DOUBLE;
  106. }
  107. else
  108. {
  109. if (DOUBLE == a && LONG == b)return DOUBLE;
  110. if (LONG == a && DOUBLE == b)return LONG;
  111. }
  112. return NULLVARIABLE;
  113. }
  114. long GetLong()const
  115. {
  116. string tmp;
  117. switch (type)
  118. {
  119. case LONG: return lValue;
  120. case DOUBLE: return (long)dValue;
  121. case STRING: tmp = strValue; return atol(tmp.c_str());
  122. default:return 0;
  123. }
  124. }
  125. double GetDouble()const
  126. {
  127. string tmp;
  128. switch (type)
  129. {
  130. case LONG: return lValue;
  131. case DOUBLE: return dValue;
  132. case STRING: tmp = strValue; return atof(tmp.c_str());
  133. default:return 0.;
  134. }
  135. }
  136. bool GetBool()const
  137. {
  138. switch (type)
  139. {
  140. case LONG: return 0 != lValue;
  141. case DOUBLE: return 0 != dValue;
  142. default:return false;
  143. }
  144. }
  145. string GetString()const
  146. {
  147. char buf[256];
  148. switch (type)
  149. {
  150. case LONG: sprintf(buf, "%ld", lValue); return buf;
  151. case DOUBLE: gcvt(dValue, 200, buf); return buf;
  152. case STRING: return strValue;
  153. default:return "";
  154. }
  155. }
  156. Variable operator+(Variable const& b)const
  157. {
  158. Variable tmp = *this;
  159. tmp.type = typeUpgrade(type, b.type);
  160. switch (tmp.type)
  161. {
  162. case LONG:tmp.lValue = GetLong() + b.GetLong(); break;
  163. case DOUBLE:tmp.dValue = GetDouble() + b.GetDouble(); break;
  164. case STRING:tmp.strValue = GetString() + b.GetString(); break;
  165. default:break;
  166. }
  167. return tmp;
  168. }
  169. Variable operator-(Variable const& b)const
  170. {
  171. Variable tmp = *this;
  172. tmp.type = typeUpgrade(type, b.type);
  173. switch (tmp.type)
  174. {
  175. case LONG:tmp.lValue = GetLong() - b.GetLong(); break;
  176. case DOUBLE:tmp.dValue = GetDouble() - b.GetDouble(); break;
  177. default:break;
  178. }
  179. return tmp;
  180. }
  181. Variable operator*(Variable const& b)const
  182. {
  183. Variable tmp = *this;
  184. tmp.type = typeUpgrade(type, b.type);
  185. switch (tmp.type)
  186. {
  187. case LONG:tmp.lValue = GetLong() * b.GetLong(); break;
  188. case DOUBLE:tmp.dValue = GetDouble() * b.GetDouble(); break;
  189. default:break;
  190. }
  191. return tmp;
  192. }
  193. Variable operator/(Variable const& b)const
  194. {
  195. Variable tmp = *this;
  196. tmp.type = typeUpgrade(type, b.type);
  197. switch (tmp.type)
  198. {
  199. case LONG:
  200. {
  201. if (0 == b.GetLong())throw "div zero";
  202. tmp.lValue = GetLong() / b.GetLong(); break;
  203. }
  204. case DOUBLE:
  205. {
  206. if (0 == b.GetDouble())throw "div zero";
  207. tmp.dValue = GetDouble() / b.GetDouble(); break;
  208. }
  209. default:break;
  210. }
  211. return tmp;
  212. }
  213. Variable operator%(Variable const& b)const
  214. {
  215. Variable tmp = *this;
  216. tmp.type = typeUpgrade(type, b.type);
  217. switch (tmp.type)
  218. {
  219. case LONG:
  220. {
  221. if (0 == b.GetLong())throw "mod zero";
  222. tmp.lValue = GetLong() % b.GetLong(); break;
  223. }
  224. default:break;
  225. }
  226. return tmp;
  227. }
  228. Variable operator>(Variable const& b)const
  229. {
  230. Variable tmp = *this;
  231. tmp.type = LONG;
  232. switch (typeUpgrade(type, b.type))
  233. {
  234. case LONG:tmp.lValue = GetLong() > b.GetLong(); break;
  235. case DOUBLE:tmp.lValue = GetDouble() > b.GetDouble(); break;
  236. case STRING:tmp.lValue = GetString() > b.GetString(); break;
  237. default:break;
  238. }
  239. return tmp;
  240. }
  241. Variable operator<(Variable const& b)const
  242. {
  243. Variable tmp = *this;
  244. tmp.type = LONG;
  245. switch (typeUpgrade(type, b.type))
  246. {
  247. case LONG:tmp.lValue = GetLong() < b.GetLong(); break;
  248. case DOUBLE:tmp.lValue = GetDouble() < b.GetDouble(); break;
  249. case STRING:tmp.lValue = GetString() < b.GetString(); break;
  250. default:break;
  251. }
  252. return tmp;
  253. }
  254. Variable operator>=(Variable const& b)const
  255. {
  256. Variable tmp = *this;
  257. tmp.type = LONG;
  258. switch (typeUpgrade(type, b.type))
  259. {
  260. case LONG:tmp.lValue = GetLong() >= b.GetLong(); break;
  261. case DOUBLE:tmp.lValue = GetDouble() >= b.GetDouble(); break;
  262. case STRING:tmp.lValue = GetString() >= b.GetString(); break;
  263. default:break;
  264. }
  265. return tmp;
  266. }
  267. Variable operator<=(Variable const& b)const
  268. {
  269. Variable tmp = *this;
  270. tmp.type = LONG;
  271. switch (typeUpgrade(type, b.type))
  272. {
  273. case LONG:tmp.lValue = GetLong() <= b.GetLong(); break;
  274. case DOUBLE:tmp.lValue = GetDouble() <= b.GetDouble(); break;
  275. case STRING:tmp.lValue = GetString() <= b.GetString(); break;
  276. default:break;
  277. }
  278. return tmp;
  279. }
  280. Variable operator==(Variable const& b)const
  281. {
  282. Variable tmp = *this;
  283. tmp.type = LONG;
  284. switch (typeUpgrade(type, b.type))
  285. {
  286. case LONG:tmp.lValue = GetLong() == b.GetLong(); break;
  287. case DOUBLE:tmp.lValue = GetDouble() == b.GetDouble(); break;
  288. case STRING:tmp.lValue = GetString() == b.GetString(); break;
  289. default:break;
  290. }
  291. return tmp;
  292. }
  293. Variable operator!=(Variable const& b)const
  294. {
  295. Variable tmp = *this;
  296. tmp.type = LONG;
  297. switch (typeUpgrade(type, b.type))
  298. {
  299. case LONG:tmp.lValue = GetLong() != b.GetLong(); break;
  300. case DOUBLE:tmp.lValue = GetDouble() != b.GetDouble(); break;
  301. case STRING:tmp.lValue = GetString() != b.GetString(); break;
  302. default:break;
  303. }
  304. return tmp;
  305. }
  306. Variable operator&&(Variable const& b)const
  307. {
  308. Variable tmp = *this;
  309. tmp.type = LONG;
  310. switch (typeUpgrade(type, b.type))
  311. {
  312. case LONG:tmp.lValue = GetLong() && b.GetLong(); break;
  313. case DOUBLE:tmp.lValue = GetDouble() && b.GetDouble(); break;
  314. default:break;
  315. }
  316. return tmp;
  317. }
  318. Variable operator||(Variable const& b)const
  319. {
  320. Variable tmp = *this;
  321. tmp.type = LONG;
  322. switch (typeUpgrade(type, b.type))
  323. {
  324. case LONG:tmp.lValue = GetLong() || b.GetLong(); break;
  325. case DOUBLE:tmp.lValue = GetDouble() || b.GetDouble(); break;
  326. default:break;
  327. }
  328. return tmp;
  329. }
  330. static char const* TypeStr(types type)
  331. {
  332. STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "NULLVARIABLE","LONG","DOUBLE","STRING" };//必须与types对应
  333. if(type>=0 && type<4)return typestr[type];
  334. else
  335. {
  336. static char buf[256];
  337. sprintf(buf, "错误的类型 %d", type);
  338. //cout << buf << endl; exit(0);
  339. return buf;
  340. }
  341. }
  342. string ToString(long level = 0)const
  343. {
  344. string ret;
  345. char buf[256];
  346. string prefix;
  347. prefix.assign(level * 4, ' ');
  348. switch (type)
  349. {
  350. case LONG:sprintf(buf, "%ld", lValue); break;
  351. case DOUBLE:gcvt(dValue, 200, buf); break;
  352. case STRING:strcpy(buf, strValue.c_str()); break;
  353. default:sprintf(buf, "NULL"); break;
  354. }
  355. ret = prefix + " ";
  356. ret += (isconst ? "常量" : "变量");
  357. ret += "类型 ";
  358. ret += TypeStr(type);
  359. ret += " : ";
  360. ret += buf;
  361. return ret;
  362. }
  363. };

        没有使用union,直接用类型和三个变量来存储,空间当然是有浪费的,但是没人知道啊。

        重载了各种类型的相互操作,都很简单,只是繁琐。

        成员变量:

类型变量名功能
enum typestype实际存储的类型
boolisconst是否是常量,常量不允许修改
longlValuetype为LONG时使用
doubledValuetype为DOUBLE时使用
stringstrValuetype为STRING时使用

        这个类没有太多需要解释的,只是作为一个基础数据结构存在。


(这里是本文结束,但不是整个系列的结束)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/640729
推荐阅读
相关标签
  

闽ICP备14008679号