赞
踩
typedef const char * (*lua_Reader) (lua_State *L, void *ud, size_t *sz);
struct Zio {
size_t n; //缓存长度
const char *p; //缓存
lua_Reader reader; //读取内容的回调函数
void *data; //回调函数的参数
lua_State *L; /* Lua state (for reader) */
};
LUAI_FUNC void luaZ_init (lua_State *L, ZIO *z, lua_Reader reader,void *data);
LUAI_FUNC int luaZ_fill (ZIO *z);
#define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
void luaZ_init (lua_State *L, ZIO *z, lua_Reader reader, void *data) {
z->L = L;
z->reader = reader;
z->data = data;
z->n = 0;
z->p = NULL;
}
#define EOZ (-1) int luaZ_fill (ZIO *z) { size_t size; lua_State *L = z->L; const char *buff; lua_unlock(L); buff = z->reader(L, z->data, &size); //调用回调,buff是读取到的数据,size返回的是buff的长度 lua_lock(L); if (buff == NULL || size == 0) //读到结尾 return EOZ; //返回-1 z->n = size - 1; //长度 z->p = buff; //缓冲区 return cast_uchar(*(z->p++)); //转成unsigned char } //返回一个字符,如果缓冲区数据为空则读取数据 #define zgetc(z) (((z)->n--)>0 ? cast_uchar(*(z)->p++) : luaZ_fill(z))
statlist -> { stat [';'] } fieldsel -> ['.' | ':'] NAME index -> '[' expr ']' recfield -> (NAME | '['exp']') = exp listfield -> exp field -> listfield | recfield constructor -> '{' [ field { sep field } [sep] ] '}' sep -> ',' | ';' parlist -> [ {NAME ','} (NAME | '...') ] body -> '(' parlist ')' block END explist -> expr { ',' expr } funcargs -> '(' [ explist ] ')' funcargs -> constructor funcargs -> STRING primaryexp -> NAME | '(' expr ')' suffixedexp -> primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs simpleexp -> FLT | INT | STRING | NIL | TRUE | FALSE | ... | constructor | FUNCTION body | suffixedexp block -> statlist restassign -> ',' suffixedexp restassign restassign -> '=' explist cond -> exp label -> '::' NAME '::' whilestat -> WHILE cond DO block END repeatstat -> REPEAT block UNTIL cond forbody -> DO block fornum -> NAME = exp,exp[,exp] forbody forlist -> NAME {,NAME} IN explist forbody forstat -> FOR (fornum | forlist) END test_then_block -> [IF | ELSEIF] cond THEN block ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END ATTRIB -> ['<' Name '>'] stat -> LOCAL NAME ATTRIB { ',' NAME ATTRIB } ['=' explist] funcname -> NAME {fieldsel} [':' NAME] funcstat -> FUNCTION funcname body stat -> func | assignment stat -> RETURN [explist] [';']
这些词法来源于lparser.c文件中的注释,一般会有独立的函数处理各个词法
enum RESERVED { /* terminal symbols denoted by reserved words */ TK_AND = FIRST_RESERVED, TK_BREAK, TK_DO, TK_ELSE, TK_ELSEIF, TK_END, TK_FALSE, TK_FOR, TK_FUNCTION, TK_GOTO, TK_IF, TK_IN, TK_LOCAL, TK_NIL, TK_NOT, TK_OR, TK_REPEAT, TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, /* other terminal symbols */ TK_IDIV, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_SHL, TK_SHR, TK_DBCOLON, TK_EOS, TK_FLT, TK_INT, TK_NAME, TK_STRING }; static const char *const luaX_tokens [] = { "and", "break", "do", "else", "elseif", "end", "false", "for", "function", "goto", "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while", "//", "..", "...", "==", ">=", "<=", "~=", "<<", ">>", "::", "<eof>", "<number>", "<integer>", "<name>", "<string>" };
在这些token定义中while之前的token在虚拟机创建时已经加入字符串池
#define NUM_RESERVED (cast_int(TK_WHILE-FIRST_RESERVED + 1))
void luaX_init (lua_State *L) {
int i;
TString *e = luaS_newliteral(L, LUA_ENV); /* create env name */
luaC_fix(L, obj2gco(e)); /* never collect this name */
for (i=0; i<NUM_RESERVED; i++) {
TString *ts = luaS_new(L, luaX_tokens[i]);
luaC_fix(L, obj2gco(ts)); /* reserved words are never collected */
ts->extra = cast_byte(i+1); /* reserved word */
}
}
这段代码的一个细节就是ts->extra的赋值,这个之在token分割函数中被用到,用来区分关键字
#define next(ls) (ls->current = zgetc(ls->z)) //使用zget读取字符 #define isreserved(s) ((s)->tt == LUA_VSHRSTR && (s)->extra > 0) //关键字区分 static int llex (LexState *ls, SemInfo *seminfo) { luaZ_resetbuffer(ls->buff); for (;;) { switch (ls->current) { case '\n': case '\r': { //换行 inclinenumber(ls); break; } case ' ': case '\f': case '\t': case '\v': { //空格 next(ls); break; } case '-': { //可能是负数,可能是注释 next(ls); if (ls->current != '-') return '-'; /* else is a comment */ next(ls); if (ls->current == '[') { //多行注释 size_t sep = skip_sep(ls); luaZ_resetbuffer(ls->buff); /* 'skip_sep' may dirty the buffer */ if (sep >= 2) { read_long_string(ls, NULL, sep); //读到没注释的地方 luaZ_resetbuffer(ls->buff); /* previous call may dirty the buff. */ break; } } /* else short comment */ while (!currIsNewline(ls) && ls->current != EOZ) //单行注释,跳出这一行 next(ls); /* skip until end of line (or end of file) */ break; } case '[': { /* long string or simply '[' */ size_t sep = skip_sep(ls); if (sep >= 2) { read_long_string(ls, seminfo, sep); //读到']'之后,把字符串保存到seminfo->ts return TK_STRING; } else if (sep == 0) /* '[=...' missing second bracket? */ lexerror(ls, "invalid long string delimiter", TK_STRING); return '['; } case '=': { next(ls); if (check_next1(ls, '=')) return TK_EQ; /* '==' */ else return '='; } case '<': { next(ls); if (check_next1(ls, '=')) return TK_LE; /* '<=' */ else if (check_next1(ls, '<')) return TK_SHL; /* '<<' */ else return '<'; } case '>': { next(ls); if (check_next1(ls, '=')) return TK_GE; /* '>=' */ else if (check_next1(ls, '>')) return TK_SHR; /* '>>' */ else return '>'; } case '/': { next(ls); if (check_next1(ls, '/')) return TK_IDIV; /* '//' */ else return '/'; } case '~': { next(ls); if (check_next1(ls, '=')) return TK_NE; /* '~=' */ else return '~'; } case ':': { next(ls); if (check_next1(ls, ':')) return TK_DBCOLON; /* '::' */ else return ':'; } case '"': case '\'': { //字符串 read_string(ls, ls->current, seminfo); //读取字符串,保存在seminfo->ts return TK_STRING; } case '.': { /* '.', '..', '...', or number */ save_and_next(ls); if (check_next1(ls, '.')) { if (check_next1(ls, '.')) return TK_DOTS; /* '...' */ else return TK_CONCAT; /* '..' */ } else if (!lisdigit(ls->current)) return '.'; else return read_numeral(ls, seminfo); } case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { return read_numeral(ls, seminfo); } case EOZ: { //结束 return TK_EOS; } default: { if (lislalpha(ls->current)) { //标识符判断 TString *ts; do { save_and_next(ls); } while (lislalnum(ls->current)); ts = luaX_newstring(ls, luaZ_buffer(ls->buff), luaZ_bufflen(ls->buff)); seminfo->ts = ts; if (isreserved(ts)) //关键字 return ts->extra - 1 + FIRST_RESERVED; //ts->extra在luaX_init初始化 else { return TK_NAME; //标识符 } } else { /* single-char tokens ('+', '*', '%', '{', '}', ...) */ int c = ls->current; next(ls); return c; } } } } }
为了方便调试简单在源码了复制了一份代码
define plua_tokens
if $argc != 1
p "参数数目错误,{plua_tokens LexState *}"
end
printf "%s",print_tokens($arg0)
end
在gdb_print.c实现print_tokens然后封装一个调试脚本
width = 10
height = 30
str = "hello lua"
tab = {a=12,b=14,c=30}
function number_max(a,b)
if a > b then
return a
end
return b
end
gdb执行
b statlist
plua_tokens ls
ls类型是 LexState
输出结果
居然的做法可以看 https://github.com/huoyang11/read_lua/blob/main/src/gdb_print.c
plua_lscode ls
ls类型是 LexState
在lua语法解析时可以通过这个脚本看解析的指令
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。