Aug 29 19:13 2013 parser_lisp.lua Page 1 --[[ Lisp Parser Module KILT -- Kamin Interpreters in Lua Toolset H. Conrad Cunningham, Professor Computer and Information Science University of Mississippi Developed for CSci 658, Software Language Engineering, Fall 2013 2013-08-19: Built Scheme module from Scheme parser code 2013-08-24: Reverted Scheme changes to create Lisp module 2013-08-29: Modified to use Opcodes and Values Factory Modules 1234567890123456789012345678901234567890123456789012345678901234567890 This parser encodes the Kamin Chapter 2 Lisp language grammar (as modified in this program) using the LPEG (Lua Parsing Expression Grammar) library. It uses function captures to build the symbol table and construct a semantic representation of the program. In this simple, Lisp-like language syntax, the semantic representation is similar to an abstract syntax tree for the expression. --]] -- Lua Parsing Expression Grammar (LPEG) text pattern-matching library local lpeg = require "lpeg" -- local names for LPEG pattern functions for convenience local P, S, R, V, C, Ct = lpeg.P, lpeg.S, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct -- KILT Interpreter Opcode Global Constants local opcodes = require "opcodes" -- KILT tree utilities library local util = require "utilities" local treeConcat = util.treeConcat -- KILT Function Table Module local ftm = require "funtab" local defFun, getFun, dumpFun = ftm.defFun, ftm.getFun, ftm.dumpFun -- KILT Values module local vm = require "values" local consList, NILLEST = vm.consList, vm.NILLEST -- CONTROL OPERATOR KEYWORDS local controlop = {} -- maps external string to opcode local IF = "if" local KIf = P(IF) controlop[IF] = IFOP Aug 29 19:13 2013 parser_lisp.lua Page 2 local WHILE = "while" local KWhile = P(WHILE) controlop[WHILE] = WHILEOP local SET = "set" local KSet = P(SET) controlop[SET] = SETOP local BEGIN = "begin" local KBegin = P(BEGIN) controlop[BEGIN] = BEGINOP local DEFINE = "define" local KDefine = P(DEFINE) local ControlOp = KIf + KWhile + KSet + KBegin + KDefine -- BUILTIN VALUE OPERATOR KEYWORDS AND SYMBOLS local builtin = {} -- maps external string to opcode local PRINT = "print" local KPrint = P(PRINT) builtin[PRINT] = PRINTOP local KOp = S("+-*/=<>") builtin["+"] = ADDOP builtin["-"] = SUBOP builtin["*"] = MULOP builtin["/"] = DIVOP builtin["="] = EQOP builtin["<"] = LTOP builtin[">"] = GTOP -- BEGIN change/add value operations for Lisp local CONS = "cons" local KCons = P(CONS) builtin[CONS] = CONSOP local CAR = "car" local KCar = P(CAR) builtin[CAR] = CAROP local CDR = "cdr" local KCdr = P(CDR) builtin[CDR] = CDROP local NUMBERP = "number?" local KNumberP = P(NUMBERP) builtin[NUMBERP] = NUMBERPOP local SYMBOLP = "symbol?" Aug 29 19:13 2013 parser_lisp.lua Page 3 local KSymbolP = P(SYMBOLP) builtin[SYMBOLP] = SYMBOLPOP local LISP = "list?" local KListP = P(LISP) builtin[LISP] = LISTPOP local NULLP = "null?" local KNullP = P(NULLP) builtin[NULLP] = NULLPOP -- local BuiltIn = KOp + KPrint -- from Core local BuiltIn = KOp + KPrint + KCons + KCar + KCdr + KNumberP + KSymbolP + KListP + KNullP -- from Lisp -- END change/add for Lisp -- REPL COMMAND KEYWORDS local command = {} local QUIT = "quit" local KQuit = P(QUIT) command[QUIT] = true local Command = KQuit -- LEXICAL ELEMENTS local KComment = P( ";" ) local KOpen = P( "(" ) local KClose = P( ")" ) local Space = S(" \n\t")^0 local Alpha = R("az") + R("AZ") + P("_") -- underscore? local Num = R("09") local AlphaNum = Alpha + Num -- BEGIN change/add for Lisp -- local Special = S([[+-*/=<>,.?:'"[{]}|\-_`~!@#$%^&]]) -- not ;)( local Special = S([[+-*/=<>,.?:"[{]}|\-_`~!@#$%^&]]) -- not ;)(' local KQuote = P("'") -- END add for Lisp -- Note: The Kamin Chapter 1 Core language allows any graphic -- character except open and closed parenthesis, semicolon, and -- reserved words in function and variable names. These names cannot -- consist of only numeric digits. The definition of Id below does not -- cause failure for any builtin operations (ValueOps) or REPL -- commands. Code in the Grammar rule transformation functions handle -- this. -- The Kamin Chapter 2 Lisp and Chapter 4 Scheme languages use single -- quotes for special syntactic purposes, so we remove it from Ids. Aug 29 19:13 2013 parser_lisp.lua Page 4 -- However, for Symbols, we allow the control operators to included so -- that Lisp code can be quoted in S-expressions. local IdStart = Alpha + Special local Id = (IdStart * (AlphaNum + Special)^0) - P(ControlOp) local Name = C(Id) * Space local Integer = C(P("-")^-1 * Num^1) * Space local ValueOp = C(BuiltIn) * Space local If = C(KIf) * Space local While = C(KWhile) * Space local Set = C(KSet) * Space local Begin = C(KBegin) * Space local Define = C(KDefine) * Space local Open = KOpen * Space local Close = KClose * Space local Comment = C((1 - KComment)^0) local ICommand = Space * C(Command) -- Grammar nonterminals local Script = V("Script") local Input = V("Input") local FunDef = V("FunDef") local FunBody = V("FunBody") local ArgList = V("ArgList") local Expr = V("Expr") local ExprBody = V("ExprBody") local Operator = V("Operator") local Value = V("Value") local Variable = V("Variable") local Function = V("Function") local LamBody = V("LamBody") -- BEGIN add for Lisp local QuotedC = V("QuotedC") local SExpr = V("SExpr") local SExprBody = V("SExprBody") local Symbol = V("Symbol") -- END add for Lisp -- STATIC SEMANTICS FUNCTIONS -- These are LPEG capture transformation functions that are called -- from within the LPEG grammar rules. They take the "captures" from -- the LPEG parsing and construct the semantic model for the Script. -- Static semantics function "xDefine" takes the input "t" -- collected by the "define" function definition grammar rule Aug 29 19:13 2013 parser_lisp.lua Page 5 -- Ct(Define * Function * ArgList * Expr) and returns the -- semantic form {FUNDEF, name #arguments, argument-list, body}. -- As a SIDE EFFECT, it also adds the function definition to -- the Function Definition table. local function xDefine(t) if lpeg.match(ValueOp * -1,t[2]) then error("Attempt to redefine builtin "..tostring(t[2]) .. ".",2) else -- parser disallows ControlOps already local name, params, body = t[2], t[3], t[4] defFun(name, #params, params, body) -- side effect return {FUNDEF,treeConcat(name)} end end -- Static semantics function "xVarVal" takes the input "t" collected -- from the variable reference grammar rule Ct(Variable) and returns -- the corresponding semantic form {VARVAL, variable-name}. local function xVarVal(t) return {VARVAL,t[1]} end -- Static semantics function "xIf" takes the input "t" collected from -- the "if" control operation grammar rule Ct(If * Expr * Expr * Expr) -- and returns the corresponding semantic form {IFOP, test, then, -- else). local function xIf(t) t[1] = IFOP return t end -- Static semantics function "xWhile" takes the input "t" collected -- from the "while" control operation grammar rule Ct(While * Expr * -- Expr) and returns the corresponding semantic form {WHILEOP, test, -- body}. local function xWhile(t) t[1] = WHILEOP return t end -- Static semantics function "xSet" takes the input "t" collected from -- the "set" control operation grammar rule Ct(Set * Variable * Expr) -- and returns the correspnding semantic form {SETOP, variable-name, -- expression}. local function xSet(t) if lpeg.match(Command * -1,t[2]) then error("Attempt to use REPL command " .. tostring(t[2]) .. " for a variable name.", 2) elseif lpeg.match(ValueOp * -1,t[2]) then Aug 29 19:13 2013 parser_lisp.lua Page 6 error("Attempt to use builtin value operator "..tostring(t[2]) .. " for a variable name.", 2) end -- grammar disallows ControlOps already t[1] = SETOP return t end -- Static semantics function "xBegin" takes the input "t" collected -- from the "begin" control operation grammar rule Ct(Begin * Expr^1) -- and returns the corresponding semantic form {BEGINOP, expr1, ... }. -- Argument t from Ct(Begin * Expr^1) local function xBegin(t) t[1] = BEGINOP return t end -- Static semantics function "xApply" takes the input "t" collected -- from the the operation application grammar rule -- Ct(Operator Expr^0) and returns the corresponding semantic -- form {APPLYOP, operator, arg1, ..., argn) or {APPLYFUN, -- function-name, arg1, ..., argn} depending upon whether -- the operator is a builtin value operator or a user-defined -- function name. -- Argument t from local function xApply(t) if lpeg.match(ValueOp * -1,t[1]) then table.insert(t,1,APPLYOP) t[2] = builtin[t[2]] else -- should we check to see if function defined? table.insert(t,1,APPLYFUN) end return t end -- Static semantics function "xNumVal" takes input collected by the -- application grammar rule C(Integer) and returns the corresponding -- semantic form {NUMSXP, number}. -- -- semantic form {NUMVAL, number}. local function xNumVal(s) -- BEGIN change for Lisp -- return {NUMVAL,tonumber(s)} -- number conversion return {NUMSXP,tonumber(s)} -- number conversion -- END change for Lisp end -- BEGIN add for Lisp -- Static semantics function "xSExprSym" takes input collected by the -- symbol S-expression grammar rule C(SExprSym) and returns the Aug 29 19:13 2013 parser_lisp.lua Page 7 -- corresponding semantic form {SYMSXP, name}. local function xSExprSym(t) return {SYMSXP, t[1]} end -- Static semantics function "xSExprList" takes input collected by the -- quoted S-expression grammar rule Ct(SExpr^0) and returns the -- corresponding semantic form {NILSXP} for empty lists or {LISTSXP, -- cons-cell} for nonempty lists. A cons-cell has car (head element) -- and cdr (tail list) fields. local function xSExprList(t) return consList(t) end -- END add for Lisp -- LISP GRAMMAR RULES -- This table of rules has embedded calls to the static sematnics -- functions using the LPEG capture feature. -- The Kamin grammar starts with Input local grammarRules = { Script, Script = Space * Ct(Input^1), Input = Expr + FunDef, FunDef = Open * FunBody * Close, FunBody = Ct(Define * Function * ArgList * Expr) / xDefine, ArgList = Open * Ct(Variable^0) * Close, Expr = Value + Ct(Variable) / xVarVal + (Open * ExprBody * Close), ExprBody = Ct(If * Expr * Expr * Expr) / xIf + Ct(While * Expr * Expr) / xWhile + Ct(Set * Variable * Expr) / xSet + Ct(Begin * Expr^1) / xBegin + Ct(Operator * Expr^0) / xApply, Operator = Function + ValueOp, Value = C(Integer) / xNumVal + QuotedC, Ct(Operator * Expr^0) / xApply, Operator = Function + ValueOp, -- BEGIN change/add for Lisp -- Value = C(Integer) / xNumVal, Value = C(Integer) / xNumVal + QuotedC, QuotedC = KQuote * SExpr, SExpr = C(Integer) / xNumVal + Ct(Symbol) / xSExprSym + (Open * SExprBody * Close), SExprBody= Ct(SExpr^0) / xSExprList, Symbol = Name + C(ControlOp) * Space, Aug 29 19:13 2013 parser_lisp.lua Page 8 -- END change/add for Lisp Function = Name, Variable = Name } -- LPEG pattern for the grammar local grammar = P(grammarRules) -- Function "parse" parses the input string "s" and returns the -- corresponding abstract syntax tree (AST). local function parse(s) local t = lpeg.match(grammar, s) if not t then error("Syntax error.", 2) end return t end -- Function "trimSpaces" removes all spaces from the beginning of -- string "line" and returns the resulting string. local function trimSpaces(line) local nonspace = lpeg.match(Space,line) return string.sub(line,nonspace,-1) end -- Function "trimComment" removes the comment from the end of string -- "line and returns the line. local function trimComment(line) return lpeg.match(Comment,line) end -- Function "getCommand" retrieves a REPL command from the beginning -- of the string "input" and returns the command. local function getCommand(input) return lpeg.match(ICommand,input) end -- MODULE EXPORT LIST return { parse = parse, trimSpaces = trimSpaces, trimComment = trimComment, getCommand = getCommand, controlop = controlop, builtin = builtin, command = command }