赞
踩
编译原理上机——函数绘图语言(一)
编译原理上机——函数绘图语言(二):词法分析器
编译原理上机——函数绘图语言(四):语义分析器
编译原理上机——函数绘图语言(五):编译器与解释器
完整代码
Gitee开源代码
为了让词法分析器更好的为语法分析器服务,我更改了词法分析器的部分代码。所以在这里重新进行说明。
这是一个表驱动型词法分析器,符号表将会被保存在文件TOKEN.npy
中.
# -*- coding: utf-8 -*- """ Created on Mon Nov 23 20:05:45 2020 @author: FishPotatoChen Copyright (c) 2020 FishPotatoChen All rights reserved. """ import numpy as np import math TOKEN = { # 常量 'PI': {'TYPE': 'CONST_ID', 'VALUE': math.pi, 'FUNCTION': None}, 'E': {'TYPE': 'CONST_ID', 'VALUE': math.e, 'FUNCTION': None}, # 变量 'T': {'TYPE': 'SYMBOL', 'VALUE': None, 'FUNCTION': None}, # 函数 'SIN': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.sin'}, 'COS': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.cos'}, 'TAN': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.tan'}, 'LN': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.log'}, 'EXP': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.exp'}, 'SQRT': {'TYPE': 'FUNC', 'VALUE': None, 'FUNCTION': 'math.sqrt'}, # 保留字 'ORIGIN': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'SCALE': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'ROT': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'IS': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'FOR': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'FROM': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'TO': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'STEP': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, 'DRAW': {'TYPE': 'KEYWORD', 'VALUE': None, 'FUNCTION': None}, # 运算符 '+': {'TYPE': 'OP', 'VALUE': None, 'FUNCTION': None}, '-': {'TYPE': 'OP', 'VALUE': None, 'FUNCTION': None}, '*': {'TYPE': 'OP', 'VALUE': None, 'FUNCTION': None}, '/': {'TYPE': 'OP', 'VALUE': None, 'FUNCTION': None}, '**': {'TYPE': 'OP', 'VALUE': None, 'FUNCTION': None}, # 记号 '(': {'TYPE': 'MARK', 'VALUE': None, 'FUNCTION': None}, ')': {'TYPE': 'MARK', 'VALUE': None, 'FUNCTION': None}, ',': {'TYPE': 'MARK', 'VALUE': None, 'FUNCTION': None}, # 结束符 ';': {'TYPE': 'END', 'VALUE': None, 'FUNCTION': None}, # 空 '': {'TYPE': 'EMPTY', 'VALUE': None, 'FUNCTION': None}, # 数字 '0': {'TYPE': 'NUMBER', 'VALUE': 0.0, 'FUNCTION': None}, '1': {'TYPE': 'NUMBER', 'VALUE': 1.0, 'FUNCTION': None}, '2': {'TYPE': 'NUMBER', 'VALUE': 2.0, 'FUNCTION': None}, '3': {'TYPE': 'NUMBER', 'VALUE': 3.0, 'FUNCTION': None}, '4': {'TYPE': 'NUMBER', 'VALUE': 4.0, 'FUNCTION': None}, '5': {'TYPE': 'NUMBER', 'VALUE': 5.0, 'FUNCTION': None}, '6': {'TYPE': 'NUMBER', 'VALUE': 6.0, 'FUNCTION': None}, '7': {'TYPE': 'NUMBER', 'VALUE': 7.0, 'FUNCTION': None}, '8': {'TYPE': 'NUMBER', 'VALUE': 8.0, 'FUNCTION': None}, '9': {'TYPE': 'NUMBER', 'VALUE': 9.0, 'FUNCTION': None}, '.': {'TYPE': 'NUMBER', 'VALUE': None, 'FUNCTION': None}, } np.save('TOKEN.npy', TOKEN)
文件名为lexer.py
# -*- coding: utf-8 -*- """ Created on Mon Nov 23 20:05:45 2020 @author: FishPotatoChen Copyright (c) 2020 FishPotatoChen All rights reserved. """ # 词法分析器 import math import re import numpy as np class Lexer: def __init__(self): # 从文件中读取记号表 self.TOKEN = np.load('TOKEN.npy', allow_pickle=True).item() def getToken(self, sentence): self.output_list = [] if sentence: tokens = sentence.split() for token in tokens: try: # No.0 # 首先识别直接可以识别的记号 # 正规式为ORIGIN|SCALE|ROT|IS|FOR|FROM|TO|STEP|DRAW|ε self.output_token(token) # No.0 识别结束 except: # 找不到就进入更高级的DFA中识别 self.argument_lexer(token) self.output_token(';') # 构造更为复杂、高级的、多种类的识别表达式 def argument_lexer(self, argument): # 扫描位置 i = 0 # 字符串长度 length = len(argument) while(i < length): # 临时字符串,即缓冲区 temp = '' if argument[i] in ['P', 'S', 'C', 'L', 'E', 'T', '*']: # No.1 # 识别"*"还是"**"的过程是一个上下文有关文法 if argument[i] == '*': i += 1 if i >= length: self.output_token(argument[i]) break elif argument[i] == '*': self.output_token('**') else: i -= 1 self.output_token(argument[i]) # No.1 识别结束 else: # No.2 # DFA判断全为字母的字符串 # 正规式为PI|E|T|SIN|COS|TAN|LOG|EXP|SQRT temp = re.findall(r"[A-Z]+", argument[i:])[0] # 看该串是否接受 self.output_token(temp) i += len(temp)-1 if i >= length: break # No.2 识别结束 elif argument[i] in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.']: # No.3 # 识别数字 if argument[i] == '.': # 识别开头为"."的数字 # 正规式为.[0-9]+ # 如:.52=>0.52 i += 1 temp = re.findall(r"\d+", argument[i:])[0] i += len(temp)-1 temp = '0.' + temp self.output_token(temp, False) else: # 识别一般数字 # 正规式为[0-9]+.?[0-9]* # 如:5.52=>5.52;12=>12 temp = re.findall(r"\d+\.?\d*", argument[i:])[0] i += len(temp)-1 self.output_token(temp, False) if i >= length: break # No.3 识别结束 else: # No.4 # 识别其他字符 # 正规式为+|-|/|(|)|,|ε self.output_token(argument[i]) i += 1 # 输出函数 # 因为解释器可以不用输出至屏幕,但是题目要求输出至屏幕 # 所以特别写了一个函数用于输出,便于接下来的语法分析器调用文法分析器 # 当输出不为屏幕时,将输出嵌入代码会变得不好更改 # 如果写成独立函数,直接更改输出函数即可 def output_token(self, token, NotNumber=True): if NotNumber: self.output_list.append([token, self.TOKEN[token]]) else: tempdic = {token: {'TYPE': 'NUMBER', 'VALUE': float(token), 'FUNCTION': None}} self.output_list.append([token, tempdic[token]])
未做更改
代码见以下链接:
编译原理上机——函数绘图语言(二):词法分析器
文件名为myparser.py
# -*- coding: utf-8 -*- """ Created on Wed Dec 2 19:36:32 2020 @author: FishPotatoChen Copyright (c) 2020 FishPotatoChen All rights reserved. """ # 语法分析器 import ast import astunparse import scanner class Parser: def __init__(self): self.scanner = scanner.Scanner("test.txt") self.dataflow = self.scanner.analyze() self.i = 0 # 数据读取位置 self.length = len(self.dataflow) # 数据流长度 def analyze(self): print('enter in program') self.i = 0 while(self.i < self.length): print('enter in statement') # 匹配保留字 # S->'ORIGIN'T|'SCALE'T|'ROT'T|'FOR'P # T->'IS('E','E')'|'IS'E # E为一个算数表达式 # P为FOR后特殊结构 if self.dataflow[self.i][1]['TYPE'] == 'KEYWORD': self.output(self.dataflow[self.i][0]) print('exit from statement') print('exit from program') def output(self, string): # 输出 # 因为输出语句结构相同 # 所以放在同一个函数中 # 实际上,语法分析到得到语法树便结束了 print('enter in '+string.lower()+'_statement') print('matchtoken '+string.upper()) self.i += 1 if string == 'ORIGIN' or string == 'SCALE': self.ORIGIN_or_SCALE() elif string == 'ROT': self.ROT() elif string == 'FOR': self.FOR() else: raise SyntaxError() print('exit from '+string.lower()+'_statement') def ORIGIN_or_SCALE(self): self.matchstring('IS') templist = self.matchparameter() self.outputTree(templist[0]) self.outputTree(templist[1]) def ROT(self): self.matchstring('IS') self.outputTree(self.matchexpression()) def FOR(self): self.matchstring('T') self.matchstring('FROM') self.outputTree(self.matchexpression()) self.matchstring('TO') self.outputTree(self.matchexpression()) self.matchstring('STEP') self.outputTree(self.matchexpression()) self.matchstring('DRAW') templist = self.matchparameter() self.outputTree(templist[0]) self.outputTree(templist[1]) def matchstring(self, string): # 匹配一个特定的字符串 if self.dataflow[self.i][0] == string: print('matchtoken '+string) self.i += 1 else: raise SyntaxError() # matchparameter与matchexpression的区别 # 前者匹配双表达式 # 或者既可以匹配双表达式又可以匹配单表达式 # 分开原因: # 考虑到ROT后面是单表达式而ORIGIN与SCALE后面都是算表达式 # 并且FOR后面既有单表达式又有双表达式 # 所以特此区分 def matchparameter(self): # 匹配(E,E) # 即匹配参数列表 # 如: # ORIGIN IS (5,5); # 后面的括号中包括括号的部分都是参数列表 temp = self.matchexpression() # 缓冲区 # 转换为列表[E,E] if temp[0] == '(' and temp[-1] == ')': temp = temp[1:-1].split(',') else: raise SyntaxError() return temp def matchexpression(self): # 匹配E或者(E,E) # 即匹配一个算数表达式 # 如 # 5*2-LN(3) # (5*2-3,tan(0.1)) temp = '' # 缓冲区 while(self.dataflow[self.i][0] != ';' and self.i < self.length and self.dataflow[self.i][1]['TYPE'] != 'KEYWORD'): if self.dataflow[self.i][1]['TYPE'] == 'FUNC': temp += self.dataflow[self.i][1]['FUNCTION'] elif self.dataflow[self.i][1]['TYPE'] == 'CONST_ID': temp += str(self.dataflow[self.i][1]['VALUE']) else: temp += self.dataflow[self.i][0] self.i += 1 if self.dataflow[self.i][0] == ';': self.i += 1 # 跳过结尾的分号 return temp def outputTree(self, string): # 输出语法树 print('enter in expression') print(astunparse.dump(ast.parse(string, filename='<unknown>'))) print('exit from expression')
文件名main.py
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 3 18:27:18 2020
@author: FishPotatoChen
Copyright (c) 2020 FishPotatoChen All rights reserved.
"""
from myparser import Parser
if __name__ == '__main__':
Parser().analyze()
文件名为test.txt
origin is (100*sin(0.5),100*cos(0.6));
origin is (0+tan(0.1),0-ln(e));
origin is (10,10);
origin is (0,0);
origin is (10,10);
ROT is E/4;
ROT is pi ** 4;
ROT is pi /4*5;
ROT is sin( 4.5)*cos(3.5);
ROT is pi/4;
enter in program enter in statement enter in origin_statement matchtoken ORIGIN matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Num(n=100), op=Mult(), right=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='sin', ctx=Load()), args=[Num(n=0.5)], keywords=[])))]) exit from expression enter in expression Module(body=[Expr(value=BinOp( left=Num(n=100), op=Mult(), right=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='cos', ctx=Load()), args=[Num(n=0.6)], keywords=[])))]) exit from expression exit from origin_statement exit from statement enter in statement enter in origin_statement matchtoken ORIGIN matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Num(n=0), op=Add(), right=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='tan', ctx=Load()), args=[Num(n=0.1)], keywords=[])))]) exit from expression enter in expression Module(body=[Expr(value=BinOp( left=Num(n=0), op=Sub(), right=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='log', ctx=Load()), args=[Num(n=2.718281828459045)], keywords=[])))]) exit from expression exit from origin_statement exit from statement enter in statement enter in origin_statement matchtoken ORIGIN matchtoken IS enter in expression Module(body=[Expr(value=Num(n=10))]) exit from expression enter in expression Module(body=[Expr(value=Num(n=10))]) exit from expression exit from origin_statement exit from statement enter in statement enter in origin_statement matchtoken ORIGIN matchtoken IS enter in expression Module(body=[Expr(value=Num(n=0))]) exit from expression enter in expression Module(body=[Expr(value=Num(n=0))]) exit from expression exit from origin_statement exit from statement enter in statement enter in origin_statement matchtoken ORIGIN matchtoken IS enter in expression Module(body=[Expr(value=Num(n=10))]) exit from expression enter in expression Module(body=[Expr(value=Num(n=10))]) exit from expression exit from origin_statement exit from statement enter in statement enter in rot_statement matchtoken ROT matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Num(n=2.718281828459045), op=Div(), right=Num(n=4)))]) exit from expression exit from rot_statement exit from statement enter in statement enter in rot_statement matchtoken ROT matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Num(n=3.141592653589793), op=Pow(), right=Num(n=4)))]) exit from expression exit from rot_statement exit from statement enter in statement enter in rot_statement matchtoken ROT matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=BinOp( left=Num(n=3.141592653589793), op=Div(), right=Num(n=4)), op=Mult(), right=Num(n=5)))]) exit from expression exit from rot_statement exit from statement enter in statement enter in rot_statement matchtoken ROT matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='sin', ctx=Load()), args=[Num(n=4.5)], keywords=[]), op=Mult(), right=Call( func=Attribute( value=Name( id='math', ctx=Load()), attr='cos', ctx=Load()), args=[Num(n=3.5)], keywords=[])))]) exit from expression exit from rot_statement exit from statement enter in statement enter in rot_statement matchtoken ROT matchtoken IS enter in expression Module(body=[Expr(value=BinOp( left=Num(n=3.141592653589793), op=Div(), right=Num(n=4)))]) exit from expression exit from rot_statement exit from statement exit from program
使用Python库ast
,这个是Python构建语法树专用的包,使用astunparse
库可以使输出变得好看一些,更有层次感。
其实和书上的上下文无关文法一样,只是把 → \rightarrow →改成了 = = =,其他都一样,不信自己可以试着看看,很简单。
举个例子:
mod = Module(stmt* body)
| Interactive(stmt* body)
| Expression(expr body)
转换为书上的写法就是
m o d → M o d u l e ( s t m t ∗ b o d y ) ∣ I n t e r a c t i v e ( s t m t ∗ b o d y ) ∣ E x p r e s s i o n ( e x p r b o d y ) mod\rightarrow Module(stmt* body)|Interactive(stmt* body)|Expression(expr body) mod→Module(stmt∗body)∣Interactive(stmt∗body)∣Expression(exprbody)
之后stmt* body又可以推导出一系列的文法,可以理解为C中的指针,stmt类型指向了名称为body的对象,只是书上用的是单个大写字母表示的非终结符,这里只是直接使用的是单词表示而已
-- ASDL's 7 builtin types are: -- identifier, int, string, bytes, object, singleton, constant -- -- singleton: None, True or False -- constant can be None, whereas None means "no value" for object. module Python { mod = Module(stmt* body) | Interactive(stmt* body) | Expression(expr body) -- not really an actual node but useful in Jython's typesystem. | Suite(stmt* body) stmt = FunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns) | AsyncFunctionDef(identifier name, arguments args, stmt* body, expr* decorator_list, expr? returns) | ClassDef(identifier name, expr* bases, keyword* keywords, stmt* body, expr* decorator_list) | Return(expr? value) | Delete(expr* targets) | Assign(expr* targets, expr value) | AugAssign(expr target, operator op, expr value) -- 'simple' indicates that we annotate simple name without parens | AnnAssign(expr target, expr annotation, expr? value, int simple) -- use 'orelse' because else is a keyword in target languages | For(expr target, expr iter, stmt* body, stmt* orelse) | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse) | While(expr test, stmt* body, stmt* orelse) | If(expr test, stmt* body, stmt* orelse) | With(withitem* items, stmt* body) | AsyncWith(withitem* items, stmt* body) | Raise(expr? exc, expr? cause) | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody) | Assert(expr test, expr? msg) | Import(alias* names) | ImportFrom(identifier? module, alias* names, int? level) | Global(identifier* names) | Nonlocal(identifier* names) | Expr(expr value) | Pass | Break | Continue -- XXX Jython will be different -- col_offset is the byte offset in the utf8 string the parser uses attributes (int lineno, int col_offset) -- BoolOp() can use left & right? expr = BoolOp(boolop op, expr* values) | BinOp(expr left, operator op, expr right) | UnaryOp(unaryop op, expr operand) | Lambda(arguments args, expr body) | IfExp(expr test, expr body, expr orelse) | Dict(expr* keys, expr* values) | Set(expr* elts) | ListComp(expr elt, comprehension* generators) | SetComp(expr elt, comprehension* generators) | DictComp(expr key, expr value, comprehension* generators) | GeneratorExp(expr elt, comprehension* generators) -- the grammar constrains where yield expressions can occur | Await(expr value) | Yield(expr? value) | YieldFrom(expr value) -- need sequences for compare to distinguish between -- x < 4 < 3 and (x < 4) < 3 | Compare(expr left, cmpop* ops, expr* comparators) | Call(expr func, expr* args, keyword* keywords) | Num(object n) -- a number as a PyObject. | Str(string s) -- need to specify raw, unicode, etc? | FormattedValue(expr value, int? conversion, expr? format_spec) | JoinedStr(expr* values) | Bytes(bytes s) | NameConstant(singleton value) | Ellipsis | Constant(constant value) -- the following expression can appear in assignment context | Attribute(expr value, identifier attr, expr_context ctx) | Subscript(expr value, slice slice, expr_context ctx) | Starred(expr value, expr_context ctx) | Name(identifier id, expr_context ctx) | List(expr* elts, expr_context ctx) | Tuple(expr* elts, expr_context ctx) -- col_offset is the byte offset in the utf8 string the parser uses attributes (int lineno, int col_offset) expr_context = Load | Store | Del | AugLoad | AugStore | Param slice = Slice(expr? lower, expr? upper, expr? step) | ExtSlice(slice* dims) | Index(expr value) boolop = And | Or operator = Add | Sub | Mult | MatMult | Div | Mod | Pow | LShift | RShift | BitOr | BitXor | BitAnd | FloorDiv unaryop = Invert | Not | UAdd | USub cmpop = Eq | NotEq | Lt | LtE | Gt | GtE | Is | IsNot | In | NotIn comprehension = (expr target, expr iter, expr* ifs, int is_async) excepthandler = ExceptHandler(expr? type, identifier? name, stmt* body) attributes (int lineno, int col_offset) arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults, arg? kwarg, expr* defaults) arg = (identifier arg, expr? annotation) attributes (int lineno, int col_offset) -- keyword arguments supplied to call (NULL identifier for **kwargs) keyword = (identifier? arg, expr value) -- import name with optional 'as' alias. alias = (identifier name, identifier? asname) withitem = (expr context_expr, expr? optional_vars) }
使用100*sin(0.5)
举例,该表达式构造的树结构如下所示
Module(body=[Expr(value=BinOp(
left=Num(n=100),
op=Mult(),
right=Call(
func=Attribute(
value=Name(
id='math',
ctx=Load()),
attr='sin',
ctx=Load()),
args=[Num(n=0.5)],
keywords=[])))])
这个怎么变成树呢?
value=BinOp
,说明是二叉树,并且根节点为运算符;left=
,这是在说左孩子是什么;Num(n=100)
,左孩子是数字,值为100;op=Mult()
,通过第一步,我们已经根节点为运算符,这里指运算符为乘号;right=
,这是在说右孩子是什么;Call
,这是一个函数;id='math'
,名字是math
,也就是Python的math
库;attr='sin'
,属性是sin
,调用的是sin
函数;args=[Num(n=0.5)]
,参数为一个数字,值为0.5;math
库中的sin
函数,函数参数为一个值为0.5的数。Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。