--[[ LPEG Arithmetic Expression Parser with Semantic Actions H. Conrad Cunningham, Professor Computer and Information Science University of Mississippi Developed for CSci 658, Software Language Engineering, Fall 2013 1234567890123456789012345678901234567890123456789012345678901234567890 2013-10-08: Completed prototype 2018-02-17: Changed unpack(t) to table.unpack(t) for Lua 5.2/5.3 This program uses the Lua Parsing Expression Grammar (LPEG) library to parse the following grammar for arithmetic expression trees, to capture significant strings during parsing, adn to generate an arithmetic expression tree similar to the list-style node used in the Arithmetic Expression Tree evaluation and differentiation example. Instead of just sine and cosine (as in the evaluation and differentiation example), this program parses general function applications with at least one argument. Exp ::= Term { TermOp Term }^* Term ::= Factor { FactorOp Factor }^* Factor ::= Func | Variable | Number | - Exp | ( Exp ) Func ::= Variable ( [ Exp { , Exp }^* ] ) TermOp ::= + | - FactorOp ::= * | / Variable ::= Leading alpabetic or underscore followed by zero or more alphabetic, numeric, or underscore characters Number ::= One or more decimal digits with optional leading - --]] -- UTILITY FUNCTIONS -- Function "treeConcat" takes a recursive list of lists in argument -- "t" and returns the corresponding parenthesized traversal string. -- A list is stored in the low positive integer indices of an -- array-style Lua table. This function is intended for formatting -- error messages, debugging, and testing. (This code was borrowed -- from the author's KILT parser utility module.) local function treeConcat(t) if type(t) ~= "table" then return tostring(t) end local res = {} for i = 1, #t do res[i] = treeConcat(t[i]) end return "(" .. table.concat(res," ") .. ")" end -- SEMANTIC DEFINITIONS AND ACTIONS -- Constants for expression tree node type tags local SUM_TYPE = "Sum" local SUB_TYPE = "Sub" local PROD_TYPE = "Prod" local DIV_TYPE = "Div" local NEG_TYPE = "Neg" local FUNC_TYPE = "Func" local VAR_TYPE = "Var" local CONST_TYPE = "Const" -- Mapping between binary operator symbols and type tags local TYPE_TAG = {} TYPE_TAG["+"] = SUM_TYPE TYPE_TAG["-"] = SUB_TYPE TYPE_TAG["*"] = PROD_TYPE TYPE_TAG["/"] = DIV_TYPE -- Semantic action functions generate the semantic representation -- Instead of creating the tables for semantic entities directly, -- these functions should call appropriate constructor functions. -- Argument t = { n } local function xNumber(n) return { CONST_TYPE, tonumber(n) } end -- Argument s = variable_name_as_string local function xVariable(s) return { VAR_TYPE, s } end -- Argument t = { negation_operator, expression } local function xNeg(t) return { NEG_TYPE, t[2] } end -- Argument t = { function_name_string, arg1, arg2, ... } local function xFunc(t) return { FUNC_TYPE, table.unpack(t) } end -- Argument t = { expr1, oper1, expr2, oper2, expr3, ... } local function xBinOp(t) if #t % 2 == 0 then error("syntax error for expression: " .. treeConcat(t)) end -- assume default binding from the left local expr = t[1] for i = 3, #t, 2 do expr = { TYPE_TAG[t[i-1]], expr, t[i] } end return expr end local xExpr = xBinOp local xTerm = xBinOp -- PARSER -- Load LPEG module local lpeg = require "lpeg" -- For convenience, define local names for LPEG functions local C, Ct, P, R, S, V = lpeg.C, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V -- Lexical Elements local Space = S(" \n\t")^0 local Alpha = R("az") + R("AZ") + P("_") local Num = R("09") local AlphaNum = Alpha + Num local NegSym = P("-") local Open = P("(") * Space local Close = P(")") * Space local ItemSep = P(",") * Space local Number = C(NegSym^-1 * Num^1) * Space -- capture string local Variable = C(Alpha * AlphaNum^0) * Space -- capture string local TermOp = C(S("+-")) * Space -- capture string local FactorOp = C(S("*/")) * Space -- capture string local Neg = C(NegSym) * Space -- capture string -- Grammar rules local Exp, Term, Factor, Func = V"Exp", V"Term", V"Factor", V"Func" local G = P { Exp; Exp = Ct(Term * (TermOp * Term)^0) / xExpr ; Term = Ct(Factor * (FactorOp * Factor)^0) / xTerm ; Factor = Func / xFunc + Variable / xVariable + Number / xNumber + Ct(Neg * Exp) / xNeg + (Open * Exp * Close) ; Func = Ct(Variable * Open * (Exp * (ItemSep * Exp)^0)^-1 * Close) } -- Notes: -- (1) Func must precede Variable in Factor for Func to be parseD. -- (2) Number must precede Neg Exp for negative number to be parsed. -- (3) Collects captured entities as list-style tables using Ct -- Allow leading spaces and disallow any trailing non-space characters G = Space * G * -1 -- Main parser function local function parse(s) assert(type(s) == "string","parse requires string argument.") return lpeg.match(G, s) end -- SOME TESTING print("treeConcat({CONST_TYPE, 12}) = "..treeConcat({CONST_TYPE,12})) local int370 = " 370 " print("parse(int370) = " .. treeConcat(parse(int370))) local neg9 = " -9 " print("parse(neg9) = " .. treeConcat(parse(neg9))) -- This is Neg(Const(9)) rather than Const(-9) local negs9 = " - 9 " print("parse(negs9) = " .. treeConcat(parse(negs9))) local varxyz = " xyz " print("parse(varxyz) = " .. treeConcat(parse(varxyz))) local term2 = "a + 24" print("parse(term2) = " .. treeConcat(parse(term2))) local term3 = "a + 24 - xyz" print("parse(term3) = " .. treeConcat(parse(term3))) local factor2 = "a * 24 " print("parse(factor2) = " .. treeConcat(parse(factor2))) local factor3 = "a * 24 / xyz" print("parse(factor3) = " .. treeConcat(parse(factor3))) local mix1 = " 2 + 3 * 4 * 6 " print("parse(mix1) = " .. treeConcat(parse(mix1))) local mix2 = " (xx * 2 + 3 * (y - z))" print("parse(mix2) = " .. treeConcat(parse(mix2))) local xyz0 = "xyz()" print("parse(xyz()) = " .. treeConcat(parse(xyz0))) local abc1 = "abc(1)" print("parse(abc(1)) = " .. treeConcat(parse(abc1))) local f3 = "f(a, b, 13)" print("parse(f(a, b,13)) = " .. treeConcat(parse(f3))) local negx = "-x" print("-x = " .. treeConcat(parse(negx))) local negmix2 = "-" .. mix2 print("-mix2 = " .. treeConcat(parse(negmix2)))