/* Engr 691-6, Special Topics, Software Language Engineering
   Hand-Coded Recursive Descent Parser for Fowler's Custom External 
     State Machine DSL (Secret Panel)
   H. Conrad Cunningham
   Version 1:  8 May 2009
   Version 2: 13 May 2009 
     Factored out trait IncrementalStateMachineBuilder.
     Added object RecursiveDescentParser and changed class Tokenizer 
     and class RecursiveDescentParser parameters.
     Improved error reporting and added exceptions for syntax errors.
     Added tracing.  Added Flyweights for keywords.
                          

123456789012345678901234567890123456789012345678901234567890123456789012345678

This is an implementation of Martin Fowler's Custom External DSL for
the State Machine Controller (Secret Panel) that uses a hand-coded
recursive descent parser and a lexical analyzer based on regular
expressions.  It utilizes the Syntax-Directed Translation and Embedded
Translation DSL patterns to populate the State Machine Semantic Model
for the input.

This parser is mostly new work by the author that does not use
Fowler's code from his book.  However, the trait
IncrementalStateMachineBuilder is similar to someof Fowler's code. In
its present state, it may not make use of Scala features particularly
well.  So far, program testing is not extensive.

The Custom External DSL is defined in the "Tree Construction" chapter
of Fowler's in-progress DSL book 

    http://www.martinfowler.com/dslwip/TreeConstruction.html.  

Below is Fowler's example input file from the Tree Construction
chapter of the in-progress DSL book.  Note that it differs slightly
from that used for Fowler's Delimiter-Directed Translation chapter
example (and this author's Scala example)--the actions are enclosed in
braces.

events
  doorClosed  D1CL
  drawOpened  D2OP
  lightOn     L1ON
  doorOpened  D1OP
  panelClosed PNCL
end

resetEvents
  doorOpened
end

commands
  unlockPanel PNUL
  lockPanel   PNLK
  lockDoor    D1LK
  unlockDoor  D1UL
end

state idle
  actions { unlockDoor lockPanel }
  doorClosed => active
end

state active
  drawOpened => waitingForLight
  lightOn    => waitingForDraw
end

state waitingForLight
  lightOn => unlockedPanel
end

state waitingForDraw
  drawOpened => unlockedPanel
end

state unlockedPanel
  actions { unlockPanel lockDoor }
  panelClosed => idle
end

Here is the grammar for the Custom External DSL that this program parses:

<Machine>        ::= <EventList> [<ResetEventList>] [<CommandList>] {<State>} 
<EventList>      ::= "events" {<EventDec>} "end".
<EventDec>       ::= <Identifier> <Identifier>.
<ResetEventList> ::= "resetEvents" {<Identifier>} "end".
<CommandList>    ::= "commands" {<CommandDec>} "end".
<CommandDec>     ::= <Identifier> <Identifier>.
<State>          ::= "state" <Identifier> [<ActionList>] {<Transition>} "end"
<ActionList>     ::= "actions" "{" {<Identifier>} "}".
<Transition>     ::= <Identifier> "=>" <Identifier>.

*/


import scala.io.Source
import scala.util.matching._
import scala.collection.mutable.Map
import java.io._
import Tokenizer._


/* The class RecursiveDescentParser implements an ad hoc recursive
   descent parser for the Custom External DSL for the State Machine
   Controller example. The parsing technique introduces a potentially
   recursive method to parse each nonterminal of the grammar.  It does
   so by parsing the right-side of each rule by calling the
   appropriate methods or checking syntactic details.  It make look
   ahead in the token stream to determine what to do next.  (At
   present, a lookahead of one token is sufficient.)

   This parser uses the author's Tokenizer class for the lexical
   analysis.  This class breaks the input file into a sequence of
   tokens of type Token, which may be an Identifier, a Keyword, some
   Other object, or a special token to denote that end-of-file has
   been reached.  This lexical analyzer is more powerful than the
   others we have used in this set of examples in that it
   distinguishes between keywords and identifiers. 

   The singleton object RecursiveDescentParser provides a method to
   load the DSL input file and parse it to configure a state machine.
 
*/

object RecursiveDescentParser {

  def loadFile(fileName: String): StateMachine  = {
    var dsl: Source = null
    try { dsl = Source.fromFile(fileName) }
    catch {
      case e: FileNotFoundException => 
        throw new RuntimeException(e.getMessage)
    }
    val loader = new RecursiveDescentParser(new Tokenizer(dsl))
    loader.trace("Loaded file '" + fileName + "'.")
    loader.parse
    val stateMachine = loader.getMachine
    loader.trace("Completed definition of state machine with start state '" 
                 + stateMachine.getStart.name + "'.")
    stateMachine
  }

}


class RecursiveDescentParser(tokens: Tokenizer)
  extends IncrementalStateMachineBuilder {

  // Parse the input using the Tokenizer argument
  def parse {
    parseMachine
    if (tokens.hasNext)
      syntaxError("Parsing complete but not at end of file.  Next token is " 
                  + tokens.peek + ".")
    finishMachine
  }

  // <Machine> ::= <EventList> [<ResetEventList>] [<CommandList>] {<State>} 
  private def parseMachine {
    trace("Parsing 'events' list.")
    parseEventList                             // <EventList>
    trace("Parsing 'resetEvents' list.")
    if (tokens.peek == KWresetEvents)          // [<ResetEventList>]
      parseResetEventList
    else
      trace("Empty 'resetEvents' list.")
    trace("Parsing 'commands' list.")
    if (tokens.peek == KWcommands)             // [<CommandList>]
      parseCommandList
    else
      trace("Empty 'commands' list.")
    trace("Parsing 'state' list.")
    if (tokens.peek == KWstate)                // {<State>}
      while (tokens.peek == KWstate)    
        parseState
    else
      trace("Empty 'state' definitions list.")
  }

  // <EventList> ::= "events" {<EventDec>} "end".
  private def parseEventList {
    var curTok = tokens.next
    if (curTok == KWevents) {
      while (tokens.peek != KWend && tokens.peek != EofToken)
        parseEventDec                          // {<EventDec>}
      curTok = tokens.next
      if (curTok == EofToken)
        syntaxError("End of file reached unexpectedly in 'events' block.")
    }
    else 
      syntaxError("Events block does not begin with keyword 'events'.")
  }

  // <EventDec>  ::= <Identifier> <Identifier>.
  private def parseEventDec {
    val tok1 = tokens.next
    val tok2 = tokens.next
    (tok1,tok2) match { 
      case (Identifier(n),Identifier(c)) => 
        addEvent(n,c)
        trace("..Define event '" + n + "'")
      case _ => 
        if (tok1 == EofToken || tok2 == EofToken) 
          syntaxError(
            "End of file reached unexpectedly in event declaration.")
        else
          syntaxError("Unexpected token " + tok1 + " or " + tok2 +
                      " in event declaration.")
    }
  }
 
  // <ResetEventList> ::= "resetEvents" {<Identifier>} "end".
  private def parseResetEventList {
    var curTok = tokens.next
    if (curTok == KWresetEvents) {
      curTok = tokens.next 
      while (curTok != KWend && curTok != EofToken) {
        curTok match {                         // {<Identifier>}
          case Identifier(n) => 
            addResetEvent(n)
            trace("..Define resetEvent '" + n + "'")
          case _ => 
            syntaxError("Unexpected token " + curTok + 
                        " in 'resetEvents' block.")
        }
        curTok = tokens.next
      }
    }
    else
      syntaxError(
        "ResetEvents block does not begin with keyword 'resetEvents'.")
  }
    
  // <CommandList> ::= "events" {<CommandDec>} "end".
  private def parseCommandList {
    var curTok = tokens.next
    if (curTok == KWcommands) {
      while (tokens.peek != KWend && tokens.peek != EofToken)
        parseCommandDec                        // {<CommandDec>}
      curTok = tokens.next
      if (curTok == EofToken) 
        syntaxError("Unexpected end of file in 'commands' block.")
    }
    else 
      syntaxError("Commands block does not begin with keyword 'commands'.")
  }

  // <CommandDec>  ::= <Identifier> <Identifier>.
  private def parseCommandDec{ 
    val tok1 = tokens.next
    val tok2 = tokens.next
    (tok1,tok2) match {
      case (Identifier(n),Identifier(c)) => 
        addCommand(n,c)
        trace("..Define command '" + n + "'")
      case _ => 
        if (tok1 == EofToken || tok2 == EofToken) 
          syntaxError(
            "End of file reached unexpectedly in command declaration.")
        else
          syntaxError("Unexpected token " + tok1 + " or " + tok2 + 
                      " in command declaration.")
    }
  }
 
  // <State> ::= "state" <Identifier> [<ActionList>] {<Transition>} "end"
  private def parseState {
    var curTok = tokens.next
    if (curTok == KWstate) {                   // "states" <Identifier>
      curTok = tokens.next
      curTok match {
        case Identifier(n) => 
          addState(n)
          trace("..Define state '" + n + "'")
        case _ => 
          syntaxError("State block does not have a name." +
                      "Token is " + curTok + ".")
      }
      if (curTok != EofToken) {
        if (tokens.peek == KWactions)          // [<ActionList>]
          parseActionList
        while (tokens.peek != KWend && tokens.peek != EofToken)
          parseTransition                      // {<Transition>}
        curTok = tokens.next
        if (curTok == EofToken)
          syntaxError("Unexpected end of file in 'state' block.")
      }
      else
        syntaxError("Unexpected end of file in 'state' block.")
    }
    else
      syntaxError("State block does not begin with keyword state.")
  }
          
  // <ActionList> ::= "actions" "{" {<Identifier>} "}".
  private def parseActionList {
    var curTok = tokens.next
    if (curTok == KWactions) {
      curTok = tokens.next
      if (curTok == KWleftBrace) {
        curTok = tokens.next
        while (curTok != KWrightBrace && tokens.peek != EofToken) {
          curTok match {                       // {<Identifier>}
            case Identifier(c) =>
              addAction(c)
              trace("....Add action '" + c + "'")
            case _ => 
              syntaxError("Unexpected token " + curTok + " in action list.")
          }
          curTok = tokens.next
        }
      }
      else
        syntaxError("""Actions block does not have opening brace '{'.""")
    }
    else
      syntaxError("Actions block does not begin with keyword 'actions'.")
  }

  // <Transition> ::= <Identifier> "=>" <Identifier>.
  private def parseTransition {
    val tok1 = tokens.next
    val tok2 = tokens.next
    val tok3 = tokens.next
    (tok1, tok2, tok3) match {
      case (Identifier(e),KWarrow,Identifier(d)) =>
        addTransition(e,d)
        trace("....Add transition on '" + e + "' to '" + d + "'")
      case _ => 
        if (tok1 == EofToken || tok2 == EofToken || tok3 == EofToken) 
          syntaxError(
            "End of file reached unexpectedly in transition declaration.")
        else
          syntaxError("Unexpected token in transition declaration:  " +
            tok1 + " or " + tok2 + " or " + tok3 + ".")
    }
  }
}


/* Class Tokenizer uses the regular expression API to break the input
   into a sequence of appropriate lexical tokens.  The tokens are objects
   from the Token hierarchy, which encodes the category of the token.

   This class implements a token sequence with a lookahead-one-token
   (peek) operation and a token pushback stack.  It invariantly keeps
   the next token from the input at the bottom of the stack.  To help
   with error detection and recovery, it returns a special end-of-file
   token EofToken if attempts are made to read beyond the end of the
   file.

   Object Tokenizer hold static attributes and constants related to
   the Tokenizer class.

   Note: The pushback stack was not used in the parser for the Custom
   External State Machine DSL.

   Note: The Scala regular expressions need to be examined more
   closely, in particular in recognition of special characters.
*/

object Tokenizer {

  // Lexical pattern regular expressions used
  private val lexpat = """([A-Za-z_]\w*)|(\d+)|(=>)|(\{)|(\})|(\s*)|\.""".r
  private val idpat  = """[A-Za-z_]\w*""".r 

  /* Keywords.  For each value of the argument, the parser only needs
     one Keyword object.  The following defines the Keyword constants
     and initializes the reserved word table.  It uses a variant of
     the Flyweight pattern to implement the Keyword objects.
  */

  val KWend         = Keyword("end")
  val KWevents      = Keyword("events")
  val KWresetEvents = Keyword("resetEvents")
  val KWcommands    = Keyword("commands")
  val KWstate       = Keyword("state")
  val KWactions     = Keyword("actions")
  val KWarrow       = Keyword("=>")
  val KWleftBrace   = Keyword("""{""")
  val KWrightBrace  = Keyword("""}""")

  // Reserved words table (must come after Keyword constant definitions)
  private val reserved = Map[String,Keyword](
    "end"      -> KWend,       "events"      -> KWevents, 
    "commands" -> KWcommands,  "resetEvents" -> KWresetEvents, 
    "state"    -> KWstate,     "actions"     -> KWactions, 
    """=>"""   -> KWarrow,     """{"""       -> KWleftBrace, 
    """}"""    -> KWrightBrace )
}


class Tokenizer(input: Source) {

  // Sequence of tokens come from a file
  private val tokens = for ( l <- input.getLines; t <- lexpat findAllIn l; 
                             if t.trim != "" ) yield classify(t)
                   
  // Pushback stack.  Keep next input token at bottom of stack.
  private var nextoks: List[Token] =
    if (tokens != null && tokens.hasNext) List(tokens.next) else Nil
      
  // Are there more tokens in the input?
  def hasNext: Boolean = nextoks != null && nextoks != Nil

  // Return the next token but do not remove it from the input
  def peek: Token = {
    if (nextoks != null && !nextoks.isEmpty)
      nextoks.head
    else 
      EofToken
  }

  // Return the next token and remove it from the input
  def next: Token = { 
    if (nextoks != null && !nextoks.isEmpty) {
      val tok = nextoks.head
      nextoks = nextoks.tail
      if (nextoks.isEmpty && tokens.hasNext) 
        nextoks = List(tokens.next) 
      tok
    }
    else 
      EofToken
  }
   
  // Put a token back as the first token to be returned by next or peek
  def putBack(t: Token) { 
    if (nextoks != null) 
      nextoks = t::nextoks
    else
      nextoks = List(t)
  }

  // Categorize input string token as appropriate Token object
  private def classify(str: String) = {
    if (reserved.isDefinedAt(str))
      reserved(str)
    else if (isId(str))
      Identifier(str) 
    else 
      Other(str) 
  }

  // Is the string an identifier
  private def isId(str: String) = (idpat findPrefixOf str) match {
    case Some(x) => true
    case None    => false
  }
}


/* The Token class hierarchy holds the input tokens in objects whose
   types denote their category.
*/

abstract class Token(str: String)
case class  Keyword(str: String)    extends Token(str)
case class  Identifier(str: String) extends Token(str)
case class  Other(str: String)      extends Token(str)
case object EofToken                extends Token("")


/*  Do some simple testing.  More is needed!!! */
object RecursiveDescentTest {

  def main(args: Array[String]) {
    println("\nRecursive Descent Parser External DSL test beginning.\n")
    
    // Create parser and parse DSL input file
    println("\nCreate parser and parse DSL input file.\n")
    val stateMachine = 
      RecursiveDescentParser.loadFile("CustomExternalStateMachineDSL2.dsl")
//  val stateMachine = 
//    RecursiveDescentParser.loadFile("CustomExternalStateMachineErr.dsl")

    println
    println(stateMachine)

    // Build the controller for some testing
    println("\nBuild controller.")
    val commandsChannel = new CommandChannel
    val control         = new Controller(stateMachine,commandsChannel)
    println(control.toString)

    // Execute the model a few steps
    control.handle('D1CL)
    control.handle('L1ON)
    control.handle('D2OP)
    control.handle('PNCL)

    println("\nCommands output:  " + commandsChannel.getOutput)

    println("\nCombinator Parser External DSL test ending.\n")
  }
}
