赞
踩
我的github:codetoys,所有代码都将会位于ctfc库中。已经放入库中我会指出在库中的位置。
这些代码大部分以Linux为目标但部分代码是纯C++的,可以在任何平台上使用。
系列入口:编程实战:类C语法的编译型脚本解释器(系列)-CSDN博客
现在开始解释所有的设计思想和与源代码。先从外围入手,最后会进入到一个巨大的解析语法的类。
本文介绍TOKEN和变量。
目录
token是编程语言的基本单元,是最小单位,包括分隔符、标识符、操作符、关键字、数字、字符串字面值,不包括空白。在C和类C语法中,空白字符包括换行都会被忽略(但预处理程序并非如此,所以预处理是额外的东西)。
所有编译程序首先都会把源代码分解成一系列token,本代码也是如此。token的相关定义如下:
- enum { TOKEN_BUF_LEN = 128 };//仅用于预定义的关键字、运算符,其它标识符任意长度
- //语法标记,去除空白之后的每个元素
- struct Token
- {
- enum types { DELIMITER = 0, OPERATOR, IDENTIFIER, NUMBER, KEYWORD, STRING };
-
- types type;//类型
- string text;//文本
- size_t pos;//在源代码中的位置
-
- Token(types _type, char const* _text, size_t _pos) :type(_type), text(_text), pos(_pos) {}
-
- string ToString()const
- {
- STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "DELIMITER","OPERATOR","IDENTIFIER","NUMBER","KEYWORD","STRING" };//必须与types对应
- char buf[TOKEN_BUF_LEN * 2];
- string ret;
- sprintf(buf, "%03ld %-12s ", pos, typestr[type]);
- ret = buf;
- ret += text.c_str();
- return ret;
- }
- };
每个Token包含类型和文本(对应源代码中的表现形式),同时为了调试需要,增加了pos记录在脚本中的位置。
作为编译的第一步,显然是将源代码分解为token。
这一步由一个类来实现:
- class CTokens
- {
- public:
- vector<Token > m_tokens;//解析出的语法元素
- bool ToTokens(string& source)
- {
- string::size_type pos = 0;
- while (GetToken(source, pos));
- return true;
- }
- };
解析出的token保存在m_tokens中,而函数ToTokens()仅仅是循环调用GetToken()解析出一个一个token而已。
GetToken()是关键的主控函数:
- bool GetToken(string& source, string::size_type& pos)
- {
- Token::types type;
- string token;
- char c;
- bool isInComment = false;
- while (pos < source.size())
- {
- c = source[pos];
- if (isInComment)
- {
- if ('\n' == c)
- {
- isInComment = false;
- }
- ++pos;
- continue;
- }
- if ('/' == c && pos + 1 < source.size() && '/' == source[pos + 1])
- {
- isInComment = true;
- pos += 2;
- continue;
- }
- if (!IsBlank(c))break;
- ++pos;
- }
- if (source.size() == pos)return false;
- if (TryGetKeyword(source.c_str(), pos, token))
- {
- type = Token::KEYWORD;
- }
- else if (TryGetNumber(source.c_str(), pos, token))
- {
- type = Token::NUMBER;
- }
- else if (TryGetString(source.c_str(), pos, token))
- {
- type = Token::STRING;
- }
- else if (TryGetDelimiter(source.c_str(), pos, token))
- {
- type = Token::DELIMITER;
- }
- else if (TryGetOperator(source.c_str(), pos, token))
- {
- type = Token::OPERATOR;
- }
- else if (TryGetIdentifier(source.c_str(), pos, token))
- {
- type = Token::IDENTIFIER;
- }
- else
- {
- CmyException::Throw(__FILE__, __LINE__, source.c_str(), pos, "无法识别的符号");
- return false;
- }
- m_tokens.push_back(Token(type, token.c_str(), pos - token.size()));
- return true;
- }
这个函数的流程不复杂,先跳过注释(仅支持单行注释),然后依次尝试每种token,每种尝试如果成功会修改当前位置pos(通过引用参数),如果失败则不会修改pos。
TryGetXXXX这一组函数每个都不复杂,不过调用顺序有名堂,关键字是最优先的,这就保证关键字不可能被用作变量名。
这个复杂一些,由几个函数组合而成:
- //headset最后一个必须是空串
- bool IsStartWith(char const* str, char const (*headset)[TOKEN_BUF_LEN], string& ret)const
- {
- long i = 0;
- ret = "";
- while (headset[i][0] != '\0')
- {
- size_t keylen = strlen(headset[i]);
- if (0 == strncmp(headset[i], str, keylen))
- {
- if (ret.size() < strlen(headset[i]))ret = headset[i];
- }
- ++i;
- }
- return ret.size() != 0;
- }
- bool IsKeyword(char const* str, string& key)const
- {
- STATIC_C char const buf[][TOKEN_BUF_LEN] = {
- "asm","default","float","operator","static_cast","union",
- "auto","delete","for","private","struct","unsigned",
- "bool","do","friend","protected","switch","using",
- "break","double","goto","public","template","virtual",
- "case","dynamic_cast","if","register","this","void",
- "catch","else","inline","reinterpret_cast","throw","volatile",
- "char","enum","int","return","true","wchar_t",
- "class","explicit","long","short","try","while",
- "const","export","mutable","signed","typedef",
- "const_cast","extern","namespace","sizeof","typeid",
- "continue","false","new","static","typename","string",
- ""
- };//必须以空串结尾
- return IsStartWith(str, buf, key);
- }
- bool TryGetKeyword(char const* source, string::size_type& pos, string& ret)
- {
- string key;
- string nextkey;
- size_t keylen;
- if (IsKeyword(source + pos, key))
- {
- keylen = key.size();
- if ('\0' == source[pos + keylen] || IsBlank(source[pos + keylen]) || IsDelimiter(source[pos + keylen]) || IsOperator(source + pos + keylen, nextkey))
- {
- ret = key;
- pos += keylen;
- return true;
- }
- }
- return false;
- }
规则其实也很简单:以关键字开头并且其后是{空白、分隔符、操作符}则为一个关键字。
数值是由数字或点开始的字母数字小数点的串,同时还需要符合一些规则,代码里分两步进行,第一步识别出串,第二步则将串根据各种规则转换为数值:
- //以数字或点开头的串
- bool TryGetNumber(char const* source, string::size_type& pos, string& ret)
- {
- ret = "";
- char c = source[pos];
- if (c >= '0' && c <= '9' || c == '.' && source[pos + 1] >= '0' && source[pos + 1] <= '9')
- {
- while ((c = source[pos]) != '\0')
- {
- if (c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || '.' == c || '_' == c)
- {
- }
- else
- {
- break;
- }
- ret += c;
- ++pos;
- }
- }
- return ret.size() != 0;
- }
- bool NumberToVariable(char const* source, Variable& var)
- {
- char* endptr;
- if (IsCharIn('.', source) || IsCharIn('e', source) || IsCharIn('E', source))
- {
- var.type = Variable::DOUBLE;
- var.dValue = strtod(source, &endptr);
- }
- else
- {
- var.type = Variable::LONG;
- long prefix = 0;
- long radix = 10;
- if (strlen(source) >= 1 && '0' == source[0])
- {
- if (strlen(source) >= 2 && ('x' == source[1] || 'X' == source[1]))
- {
- radix = 16;
- prefix = 2;
- }
- else
- {
- radix = 8;
- prefix = 1;
- }
- }
- var.lValue = strtol(source + prefix, &endptr, radix);
- }
- if (strlen(endptr) != 0)
- {
- if (Variable::DOUBLE == var.type && (0 == stricmp(endptr, "f") || 0 == stricmp(endptr, "l"))
- || Variable::LONG == var.type && (0 == stricmp(endptr, "u") || 0 == stricmp(endptr, "l") || 0 == stricmp(endptr, "i64")))
- {
- return true;
- }
- string str;
- str = "数值常量格式错误 ";
- str += endptr;
- CException::Throw(__FILE__, __LINE__, source, endptr - source, str.c_str());
- return false;
- }
- return true;
- }
我非常痛恨无类型变量,比如JavaScript,所以我在这个脚本里面使用强类型。
变量类型做了简化,分为long、double和string。通过一个类存储所有的变量,代码如下:
- //变量
- struct Variable
- {
- enum types { NULLVARIABLE = 0, LONG, DOUBLE, STRING };
- types type;
- bool isconst;
- long lValue;
- double dValue;
- string strValue;
-
- Variable() :type(NULLVARIABLE), isconst(false), lValue(0), dValue(0.) {}
- bool isNull() { return type == NULLVARIABLE; }
- bool isNumber() { return type == LONG || type == DOUBLE; }
- bool isString() { return type == STRING; }
- void clear()
- {
- type = NULLVARIABLE;
- isconst = false;
- lValue = 0;
- dValue = 0;
- strValue = "";
- }
- void initvalue()
- {
- lValue = 0;
- dValue = 0;
- strValue = "";
- }
- Variable& operator = (long v)
- {
- char buf[256];
- if (NULLVARIABLE == type)type = LONG;
- switch (type)
- {
- case LONG:lValue = v; break;
- case DOUBLE:dValue = v; break;
- case STRING:
- sprintf(buf, "%ld", v);
- strValue = buf;
- break;
- default:break;
- }
- return *this;
- }
- Variable& operator = (double v)
- {
- char buf[256];
- if (NULLVARIABLE == type)type = DOUBLE;
- switch (type)
- {
- case LONG:lValue = (long)v; break;
- case DOUBLE:dValue = v; break;
- case STRING:
- gcvt(v, 200, buf);
- strValue = buf;
- break;
- default:break;
- }
- return *this;
- }
- Variable& operator = (string const& v)
- {
- if (NULLVARIABLE == type)type = STRING;
- switch (type)
- {
- case LONG:lValue = atol(v.c_str()); break;
- case DOUBLE:dValue = atof(v.c_str()); break;
- case STRING:strValue = v; break;
- default:break;
- }
- return *this;
- }
- Variable& operator = (Variable const& v)
- {
- if (NULLVARIABLE == type)type = v.type;
- switch (type)
- {
- case LONG:lValue = v.GetLong(); break;
- case DOUBLE:dValue = v.GetDouble(); break;
- case STRING:strValue = v.GetString(); break;
- default:break;
- }
- return *this;
- }
- Variable operator-()const
- {
- Variable tmp = *this;
- switch (type)
- {
- case LONG:tmp.lValue = -lValue; break;
- case DOUBLE:tmp.dValue = -dValue; break;
- default:break;
- }
- return tmp;
- }
- //eva=true则是为赋值提升,结果以左边为准
- static types typeUpgrade(types a, types b, bool eva = false)
- {
- if (NULLVARIABLE == a || NULLVARIABLE == b)return NULLVARIABLE;
- if (LONG == a && LONG == b)return LONG;
- if (STRING == a && STRING == b)return STRING;
- if (DOUBLE == a && DOUBLE == b)return DOUBLE;
- if (!eva)
- {
- if (DOUBLE == a && LONG == b)return DOUBLE;
- if (LONG == a && DOUBLE == b)return DOUBLE;
- }
- else
- {
- if (DOUBLE == a && LONG == b)return DOUBLE;
- if (LONG == a && DOUBLE == b)return LONG;
- }
- return NULLVARIABLE;
- }
- long GetLong()const
- {
- string tmp;
- switch (type)
- {
- case LONG: return lValue;
- case DOUBLE: return (long)dValue;
- case STRING: tmp = strValue; return atol(tmp.c_str());
- default:return 0;
- }
- }
- double GetDouble()const
- {
- string tmp;
- switch (type)
- {
- case LONG: return lValue;
- case DOUBLE: return dValue;
- case STRING: tmp = strValue; return atof(tmp.c_str());
- default:return 0.;
- }
- }
- bool GetBool()const
- {
- switch (type)
- {
- case LONG: return 0 != lValue;
- case DOUBLE: return 0 != dValue;
- default:return false;
- }
- }
- string GetString()const
- {
- char buf[256];
- switch (type)
- {
- case LONG: sprintf(buf, "%ld", lValue); return buf;
- case DOUBLE: gcvt(dValue, 200, buf); return buf;
- case STRING: return strValue;
- default:return "";
- }
- }
- Variable operator+(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = typeUpgrade(type, b.type);
- switch (tmp.type)
- {
- case LONG:tmp.lValue = GetLong() + b.GetLong(); break;
- case DOUBLE:tmp.dValue = GetDouble() + b.GetDouble(); break;
- case STRING:tmp.strValue = GetString() + b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator-(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = typeUpgrade(type, b.type);
- switch (tmp.type)
- {
- case LONG:tmp.lValue = GetLong() - b.GetLong(); break;
- case DOUBLE:tmp.dValue = GetDouble() - b.GetDouble(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator*(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = typeUpgrade(type, b.type);
- switch (tmp.type)
- {
- case LONG:tmp.lValue = GetLong() * b.GetLong(); break;
- case DOUBLE:tmp.dValue = GetDouble() * b.GetDouble(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator/(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = typeUpgrade(type, b.type);
- switch (tmp.type)
- {
- case LONG:
- {
- if (0 == b.GetLong())throw "div zero";
- tmp.lValue = GetLong() / b.GetLong(); break;
- }
- case DOUBLE:
- {
- if (0 == b.GetDouble())throw "div zero";
- tmp.dValue = GetDouble() / b.GetDouble(); break;
- }
- default:break;
- }
- return tmp;
- }
- Variable operator%(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = typeUpgrade(type, b.type);
- switch (tmp.type)
- {
- case LONG:
- {
- if (0 == b.GetLong())throw "mod zero";
- tmp.lValue = GetLong() % b.GetLong(); break;
- }
- default:break;
- }
- return tmp;
- }
- Variable operator>(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() > b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() > b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() > b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator<(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() < b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() < b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() < b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator>=(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() >= b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() >= b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() >= b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator<=(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() <= b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() <= b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() <= b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator==(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() == b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() == b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() == b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator!=(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() != b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() != b.GetDouble(); break;
- case STRING:tmp.lValue = GetString() != b.GetString(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator&&(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() && b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() && b.GetDouble(); break;
- default:break;
- }
- return tmp;
- }
- Variable operator||(Variable const& b)const
- {
- Variable tmp = *this;
- tmp.type = LONG;
- switch (typeUpgrade(type, b.type))
- {
- case LONG:tmp.lValue = GetLong() || b.GetLong(); break;
- case DOUBLE:tmp.lValue = GetDouble() || b.GetDouble(); break;
- default:break;
- }
- return tmp;
- }
- static char const* TypeStr(types type)
- {
- STATIC_C const char typestr[][TOKEN_BUF_LEN] = { "NULLVARIABLE","LONG","DOUBLE","STRING" };//必须与types对应
- if(type>=0 && type<4)return typestr[type];
- else
- {
- static char buf[256];
- sprintf(buf, "错误的类型 %d", type);
- //cout << buf << endl; exit(0);
- return buf;
- }
- }
- string ToString(long level = 0)const
- {
- string ret;
- char buf[256];
- string prefix;
- prefix.assign(level * 4, ' ');
- switch (type)
- {
- case LONG:sprintf(buf, "%ld", lValue); break;
- case DOUBLE:gcvt(dValue, 200, buf); break;
- case STRING:strcpy(buf, strValue.c_str()); break;
- default:sprintf(buf, "NULL"); break;
- }
- ret = prefix + " ";
- ret += (isconst ? "常量" : "变量");
- ret += "类型 ";
- ret += TypeStr(type);
- ret += " : ";
- ret += buf;
- return ret;
- }
- };
没有使用union,直接用类型和三个变量来存储,空间当然是有浪费的,但是没人知道啊。
重载了各种类型的相互操作,都很简单,只是繁琐。
成员变量:
类型 | 变量名 | 功能 |
enum types | type | 实际存储的类型 |
bool | isconst | 是否是常量,常量不允许修改 |
long | lValue | type为LONG时使用 |
double | dValue | type为DOUBLE时使用 |
string | strValue | type为STRING时使用 |
这个类没有太多需要解释的,只是作为一个基础数据结构存在。
(这里是本文结束,但不是整个系列的结束)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。