-- H. Conrad Cunningham
-- Dept. of Computer and Info. Science, U. of Mississippi
-- CSci 555, Fall 1994, Homework #3
-- A recognizer for simple mathematical expressions
-- 14 November 1994

-- BNF for expression
--	expression ::= term    |  term addOp expression
--	term	   ::= factor  |  factor mulOp term
--	factor	   ::= number  |  identifier  |  ( expression )

-- Lexical symbols
--	addOp;	    char "+" or "-"
--	mulOp:	    char "*" or "/"
--	identifier: first char alphabetic, all alphanumerics and
--		    underscores that follow    
--	number:	    first char numeric, all numerics that follow
--	space:	    space characters (blanks, tabs, newlines, etc.)
--		    occur anywhere except in identifiers and numbers

type Token = String

-- For testing purposes, function "test" takes a string and shows
-- both the "Bool" and "[Token]" returns from "expr".

test :: String -> (Bool,[Token])
test xs = expr (lex xs)

-- Function "valid" takes a string and returns "True" if and only if
-- the string is an acceptable expression.

valid :: String -> Bool
valid xs = exprOk && rest == []
	   where (exprOk,rest) = expr (lex xs)

-- Function "lex" takes a string and returns the corresponding list of
-- lexical tokens.  Except for spaces, identifiers, and numbers, each
-- character is considered a token.

lex :: String -> [Token]
lex []		    = []
lex xs@(x:xs')
	| isSpace x = lex xs'
	| isAlpha x = let (id,rest) = span p xs
			  p z       = isAlphanum z || z == '_'
		      in  id : lex rest
	| isDigit x = let (num,rest) = span isDigit xs
		      in num : lex rest
	| otherwise = [x] : lex xs'

-- Function "expr" takes a token list and returns a tuple.  The first
-- component of the tuple is "True" if and only if an expression is
-- recognized at the beginning of the token list.  If the first
-- component is "True", then the second component is the token list
-- remaining after the expression is removed.  Otherwise, the second
-- component is the token list remaining at the point an error is discovered.

expr :: [Token] -> (Bool,[Token])
expr []				= (False,[])
expr xs
	| rest == []		= (termOk,[])
	| termOk && next == ")" = (True,rest)
	| termOk && addOk next	= expr aft
	| otherwise		= (False,rest)
	  where (termOk,rest) = term xs
		(next:aft)    = rest

-- Function "term" takes a token list and returns a tuple.  The first
-- component of the tuple is "True" if and only if a term is recognized at
-- the beginning of the token list.  If the first component is "True", then
-- the second component is the token list remaining after the term is
-- removed.   Otherwise, the second component is the token list remaining
-- at the point an error is discovered.

term :: [Token] -> (Bool,[Token])
term []				  = (False,[])
term xs
	| rest == []		  = (factorOk,[])
	| factorOk && next == ")" = (True,rest)
	| factorOk && addOk next  = (True,rest)
	| factorOk && mulOk next  = term aft
	| otherwise		  = (False,rest)
	  where (factorOk,rest) = factor xs
		(next:aft)	= rest

-- Function "factor" takes a token list and returns a tuple.  The
-- first component of the tuple is "True" if and only if a factor is
-- recognized at the beginning of the token list.  If the first component
-- is "True", then the second component is the token list remaining after
-- the factor is removed.  Otherwise, the second component is the token
-- list remaining at the point an error is discovered.  Function "factor2"
-- recognizes a nested  expression and its closing parenthesis.

factor :: [Token] -> (Bool,[Token])
factor []	    = (False,[])
factor xs@(x:xs')
	| ident x   = (True,xs')
	| numb x    = (True,xs')
	| x == "("  = factor2 xs'
	| otherwise = (False,xs)

factor2 :: [Token] -> (Bool,[Token])
factor2 []			= (False,[])
factor2 xs 
	| rest == []		= (False,[])
	| exprOk && next == ")" = (True,aft)
	| otherwise		= (False,rest)
	  where (exprOk,rest) = expr xs
		(next:aft)    = rest

-- Functions "ident", "numb", "addOk", and "mulOk" take a token and
-- return "True" if and only if the token is an identifier, number,
-- addOp, or mulOp, respectively.  Functions "ident" and "numb" assume
-- that "lex" has worked correctly.

ident :: Token -> Bool
ident []     = False
ident (x:xs) = isAlpha x

numb :: Token -> Bool
numb []	     = False
numb (x:xs)  = isDigit x

addOk :: Token -> Bool
addOk x      = (x == "+" || x == "-")

mulOk :: Token -> Bool
mulOk x      = (x == "*" || x == "/")
