{- H. Conrad Cunningham
   Dept. of Computer and Info. Science, U. of Mississippi
   CSci 555, Fall 1994, Homework #3
   A recognizer for simple mathematical expressions
   14 November 1994
   Revised 8 February 2006
   Revised 16 September 2010

123456789012345678901234567890123456789012345678901234567890123456789012345678

   This program implements a simple recursive descent recognizer for
   the following grammar.

   BNF for expression
  	expression ::= term    |  term addOp expression
  	term	   ::= factor  |  factor mulOp term
  	factor	   ::= number  |  identifier  |  ( expression )

   Lexical symbols
  	addOp;	    char "+" or "-"
  	mulOp:	    char "*" or "/"
  	identifier: first char alphabetic, all alphanumerics and
  		    underscores that follow    
  	number:	    first char numeric, all numerics that follow
  	space:	    space characters (blanks, tabs, newlines, etc.)
  		    occur anywhere except in identifiers and numbers
-}

import Char

type Token = String

{- For testing purposes, function "test" takes a string and shows
   both the "Bool" and "[Token]" returns from "expr".
-}
test :: String -> IO()
test xs = putStrLn ("\"" ++ xs ++ "\" ==> " ++ show (expr (lex' xs)))

{- Function "valid" takes a string and returns "True" if and only if
   the string is an acceptable expression.
-}
valid :: String -> Bool
valid xs = exprOk && rest == []
	   where (exprOk,rest) = expr (lex' xs)

{- Function "lex'" takes a string and returns the corresponding list of
   lexical tokens.  Except for spaces, identifiers, and numbers, each
   character is considered a token.
-}
lex' :: String -> [Token]
lex' []		    = []
lex' xs@(x:xs')
	| isSpace x = lex' xs'
	| isAlpha x = let (id,rest) = span p xs
			  p z       = isAlphaNum z || z == '_'
		      in  id : lex' rest
	| isDigit x = let (num,rest) = span isDigit xs
		      in num : lex' rest
	| otherwise = [x] : lex' xs'

{- Function "expr" takes a token list and returns a tuple.  The first
   component of the tuple is "True" if and only if an expression is 
   recognized at the beginning of the token list.  If the first 
   component is "True", then the second component is the token list 
   remaining after the expression is removed.  Otherwise, the second 
   component is the token list remaining at the point an error is
   discovered.
-}
expr :: [Token] -> (Bool,[Token])
expr []				= (False,[])
expr xs
	| rest == []		= (termOk,[])
	| termOk && next == ")" = (True,rest)
	| termOk && addOk next	= expr aft
	| otherwise		= (False,rest)
	  where (termOk,rest) = term xs
		(next:aft)    = rest

{- Function "term" takes a token list and returns a tuple.  The first
   component of the tuple is "True" if and only if a term is recognized at
   the beginning of the token list.  If the first component is "True", then
   the second component is the token list remaining after the term is
   removed.   Otherwise, the second component is the token list remaining
   at the point an error is discovered.
-}
term :: [Token] -> (Bool,[Token])
term []				  = (False,[])
term xs
	| rest == []		  = (factorOk,[])
	| factorOk && next == ")" = (True,rest)
	| factorOk && addOk next  = (True,rest)
	| factorOk && mulOk next  = term aft
	| otherwise		  = (False,rest)
	  where (factorOk,rest) = factor xs
		(next:aft)	= rest

{- Function "factor" takes a token list and returns a tuple.  The
   first component of the tuple is "True" if and only if a factor is
   recognized at the beginning of the token list.  If the first component
   is "True", then the second component is the token list remaining after
   the factor is removed.  Otherwise, the second component is the token
   list remaining at the point an error is discovered.  Function "factor2"
   recognizes a nested  expression and its closing parenthesis.
-}
factor :: [Token] -> (Bool,[Token])
factor []	    = (False,[])
factor xs@(x:xs')
	| ident x   = (True,xs')
	| numb x    = (True,xs')
	| x == "("  = factor2 xs'
	| otherwise = (False,xs)

factor2 :: [Token] -> (Bool,[Token])
factor2 []			= (False,[])
factor2 xs 
	| rest == []		= (False,[])
	| exprOk && next == ")" = (True,aft)
	| otherwise		= (False,rest)
	  where (exprOk,rest) = expr xs
		(next:aft)    = rest

{- Functions "ident", "numb", "addOk", and "mulOk" take a token and
   return "True" if and only if the token is an identifier, number,
   addOp, or mulOp, respectively.  Functions "ident" and "numb" assume
   that "lex' " has worked correctly.
-}
ident :: Token -> Bool
ident []     = False
ident (x:xs) = isAlpha x

numb :: Token -> Bool
numb []	     = False
numb (x:xs)  = isDigit x

addOk :: Token -> Bool
addOk x      = (x == "+" || x == "-")

mulOk :: Token -> Bool
mulOk x      = (x == "*" || x == "/")

-- Partial Testing of the package

test00 = test ""
test01 = test "x3"
test02 = test "3"
test03 = test "xx+ 1"
test04 = test "xy * 3"
test05 = test "(x - 0 *( 3 +x2))"
test06 = test "(x - 0 *( 3 +x2) "
test07 = test ")*(&)("
test08 = test "x1xd!"

testall = do
            test00
            test01
            test02
            test03
            test04
            test05
            test06
            test07
            test08

