{ (* Lexer for uSQL, very simple SQL SELECT statements. sestoft@dina.kvl.dk * 2001-02-22 *) open Lexing Sqlpar; exception LexicalError of string * int * int (* (message, loc1, loc2) *) fun lexerError lexbuf s = raise LexicalError (s, getLexemeStart lexbuf, getLexemeEnd lexbuf); (* Distinguish keywords from identifiers using function `keyword' below: *) local fun addkwd ((kwd, tok), bmap) = Binarymap.insert(bmap, kwd, tok) val keywords = List.foldr addkwd (Binarymap.mkDict String.compare) [("and", AND), ("false", CSTBOOL false), ("from", FROM), ("not", NOT), ("or", OR), ("select", SELECT), ("true", CSTBOOL true)] in fun keyword s = case Binarymap.peek(keywords, s) of SOME tok => tok | NONE => NAME s end (* For scanning strings and comments *) local val savedLexemeStart = ref 0 in fun resetLexerState lexbuf = (savedLexemeStart := getLexemeStart lexbuf) fun notTerminated kind lexbuf = (resetLexerState lexbuf; raise LexicalError (kind ^ " not terminated", !savedLexemeStart, getLexemeEnd lexbuf)) end (* A string constant is scanned as a list of characters. To handle very long string constants, use a CharArray instead, and extend it dynamically (by doubling it when too small). *) local val string_buff = ref [] : char list ref in fun reset_string_buffer lexbuf = (resetLexerState lexbuf; string_buff := []) fun store_string_char c = (string_buff := c :: !string_buff) fun store_c_escape lexbuf = case Char.fromCString (getLexeme lexbuf) of NONE => lexerError lexbuf "illegal escape sequence" | SOME c => store_string_char c fun get_string() = String.implode (List.rev (!string_buff)) end } rule Token = parse [` ` `\t` `\n` `\r`] { Token lexbuf } | [`0`-`9`]+ { case Int.fromString (getLexeme lexbuf) of NONE => lexerError lexbuf "internal error" | SOME i => CSTINT i } | [`a`-`z``A`-`Z``_`][`a`-`z``A`-`Z``_``0`-`9`]* { keyword (String.map Char.toLower (getLexeme lexbuf)) } | `+` { PLUS } | `-` { MINUS } | `*` { TIMES } | `/` { DIV } | `%` { MOD } | "=" { EQ } | "<>" { NE } | `>` { GT } | `<` { LT } | ">=" { GE } | "<=" { LE } | `(` { LPAR } | `)` { RPAR } | `,` { COMMA } | `.` { DOT } | "--" { SkipToEndLine lexbuf; Token lexbuf } | `'` { String lexbuf; CSTSTRING (get_string()) } | eof { EOF } | _ { lexerError lexbuf "Illegal symbol in input" } and SkipToEndLine = parse [`\n` `\r`] { () } | (eof | `\^Z`) { () } | _ { SkipToEndLine lexbuf } and String = parse `'` { () } | `\\` [`\\` `"` `a` `b` `t` `n` `v` `f` `r`] { store_c_escape lexbuf; String lexbuf } | "''" { (store_string_char #"'"; String lexbuf) } | `\\` { lexerError lexbuf "illegal escape sequence" } | (eof | `\^Z`) { notTerminated "string" lexbuf } | [`\n` `\r`] { lexerError lexbuf "newline not permitted in string" } | [`\^A`-`\^Z` `\127` `\255`] { lexerError lexbuf "invalid character in string" } | _ { (store_string_char(getLexemeChar lexbuf 0); String lexbuf) } ;