{ (* Lexer for a simple functional language (uML) sestoft@dina.kvl.dk * 2001-02-21 *) open Lexing Funpar; exception LexicalError of string * int * int (* (message, loc1, loc2) *) fun lexerError lexbuf s = raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf); val commentStart = ref 0; (* Start of outermost comment being scanned *) fun commentNotClosed lexbuf = raise LexicalError ("Comment not terminated", !commentStart, getLexemeEnd lexbuf); val commentDepth = ref 0; (* Current comment nesting *) (* Distinguish keywords from identifiers using function `keyword' below: *) local fun addkwd ((kwd, tok), bmap) = Binarymap.insert(bmap, kwd, tok) val keywords = List.foldr addkwd (Binarymap.mkDict String.compare) [("else", ELSE), ("end", END), ("false", CSTBOOL false), ("if", IF), ("in", IN), ("let", LET), ("not", NOT), ("then", THEN), ("true", CSTBOOL true)] in fun keyword s = case Binarymap.peek(keywords, s) of SOME tok => tok | NONE => NAME s end } rule Token = parse [` ` `\t` `\n` `\r`] { Token lexbuf } | `~`?[`0`-`9`]+ { case Int.fromString (getLexeme lexbuf) of NONE => lexerError lexbuf "internal error" | SOME i => CSTINT i } | [`a`-`z``A`-`Z`][`a`-`z``A`-`Z``0`-`9`]* { keyword (getLexeme lexbuf) } | "(*" { commentStart := getLexemeStart lexbuf; commentDepth := 1; SkipComment lexbuf; Token lexbuf } | `&` { ANDALSO } | "|" { ORELSE } | `=` { EQ } | "<>" { NE } | `>` { GT } | `<` { LT } | ">=" { GE } | "<=" { LE } | `+` { PLUS } | `-` { MINUS } | `*` { TIMES } | `/` { DIV } | `%` { MOD } | `(` { LPAR } | `)` { RPAR } | eof { EOF } | _ { lexerError lexbuf "Illegal symbol in input" } and SkipComment = parse "*)" { commentDepth := !commentDepth - 1; if !commentDepth = 0 then () else SkipComment lexbuf } | "(*" { commentDepth := !commentDepth + 1; SkipComment lexbuf } | (eof | `\^Z`) { commentNotClosed lexbuf } | _ { SkipComment lexbuf } ;