(* A continuation-based (backwards) compiler from parts of micro-C, a fraction of the C language, to Java Virtual Machine code. sestoft@dina.kvl.dk * 2001-04-04, 2002-04-15 To compile this compiler and use it: * compile the micro-C lexer and parser specifications * download and unpack the precompiled SML-JVM toolkit in ../jvm * mosml -P full -I .. -I ../jvm parse.sml jvmcomp.sml * jvmcompile2file (parsefile "ex11.c") "Out"; * java Out ... The source language is a restricted version of micro-C. The compilation of a block { ... }, which may contain a mixture of declarations and statements, proceeds in two passes: Pass 1: elaborate declarations to find the environment in which each statement must be compiled; also translate declarations into allocation instructions, of type bstmtordec. Pass 2: compile the statements in the given environments. *) app load ["Absyn", "Env", "Bytecode", "Classdecl", "Stackdepth", "Classfile"]; open Absyn Env Bytecode; type instrs = jvm_instr list (* The intermediate representation between passes 1 and 2 above: *) datatype bstmtordec = BDec of instrs (* Declaration of local variable *) | BStmt of stmt (* A statement *) (* ------------------------------------------------------------------- *) (* Generate a fresh label (a JVM pseudo-instruction): *) local val nextlab = ref Label.freshLabels : Label.labels ref in fun resetLabels () = nextlab := Label.freshLabels fun newLabel () = let val (labels, lab) = Label.newLabel (!nextlab) in nextlab := labels; lab end end; (* ------------------------------------------------------------------- *) (* Code-generating functions that perform local optimizations *) fun addPop C : instrs = Jpop :: C fun addLabel C : Label.label * instrs = case C of Jlabel lab :: _ => (lab, C) | Jgoto lab :: _ => (lab, C) | _ => let val lab = newLabel() in (lab, Jlabel lab :: C) end fun makeJump C : jvm_instr * instrs = case C of Jreturn :: _ => (Jreturn, C) | Jlabel lab :: Jreturn :: _ => (Jreturn, C) | Jlabel lab :: _ => (Jgoto lab, C) | Jgoto lab :: _ => (Jgoto lab, C) | _ => let val lab = newLabel() in (Jgoto lab, Jlabel lab :: C) end fun deadcode C = case C of [] => [] | Jlabel lab :: _ => C | _ :: C1 => deadcode C1 fun addNot C = case C of Jifeq lab :: C1 => Jifne lab :: C1 | Jifne lab :: C1 => Jifeq lab :: C1 | _ => intConst 1 :: Jixor :: C fun addJump jump C = (* jump is Jgoto or Jreturn *) let val C1 = deadcode C in case (jump, C1) of (Jgoto lab1, Jlabel lab2 :: _) => if lab1=lab2 then C1 else Jgoto lab1 :: C1 | _ => jump :: C1 end; fun addGoto lab C = addJump (Jgoto lab) C fun addCst i C = case (i, C) of (0, Jiadd :: C1) => C1 | (0, Jisub :: C1) => C1 | (1, Jimul :: C1) => C1 | (1, Jidiv :: C1) => C1 | (_, Jpop :: C1) => C1 | (0, Jifeq lab :: C1) => addGoto lab C1 | (_, Jifeq lab :: C1) => C1 | (0, Jifne lab :: C1) => C1 | (_, Jifne lab :: C1) => addGoto lab C1 | _ => intConst i :: C; (* ------------------------------------------------------------------- *) (* The environment keeps track of variable locations (in the JVM localvars) *) type venv = (string, Localvar.index * typ) env (* compile-time environment *) (* Generate a fresh JVM local variable *) local val localsref = ref Localvar.freshLocals : Localvar.locals ref in fun setLocals locals = (localsref := locals) fun getLocals () = !localsref fun newLocal1 () = let val (locals, loc) = Localvar.nextVar1 (!localsref) in localsref := locals; loc end end; (* Bind declared variable in env and generate code to allocate it: *) fun allocate (typ, x) env : venv * instrs = let val (loc, code) = case typ of TypA (t, SOME i) => let val loc = newLocal1 () in (loc, addCst i [Jnewarray {elem = Jvmtype.Tint, dim = 1}, Jastore loc]) end | TypA (t, e) => raise Fail "allocate: dynamic array size" | _ => (newLocal1 (), []) in (bind1 env (x, (loc, typ)), code) end (* Bind declared parameter in env: *) fun bindparam (typ, x) env : Localvar.index * venv = let val loc = newLocal1 () in (loc, bind1 env (x, (loc, typ))) end; fun bindparams [] env : Localvar.index list * venv = ([], env) | bindparams (param1::paramr) env = let val (idx1, env1) = bindparam param1 env val (idxr, envr) = bindparams paramr env1 in (idx1::idxr, envr) end (* ------------------------------------------------------------------- *) (* The static methods InOut.printi and InOut.printc, used in cExpr: *) val inoutClass = Jvmtype.class { pkgs=[], name = "InOut" } val printi = { class = inoutClass, name = "printi", msig = ([Jvmtype.Tint], NONE) } val printc = { class = inoutClass, name = "printc", msig = ([Jvmtype.Tchar], NONE) } (* The static method Integer.parseInt, used in cProgram: *) val integerClass = Jvmtype.class { pkgs=["java", "lang"], name = "Integer" } val stringClass = Jvmtype.class { pkgs=["java", "lang"], name = "String"} val parseInt = { class = integerClass, name = "parseInt", msig = ([Jvmtype.Tclass stringClass], SOME Jvmtype.Tint) } (* ------------------------------------------------------------------- *) (* Compiling micro-C statements *) fun cStmt stmt (env : venv) (C : instrs) : instrs = case stmt of If(e, stmt1, stmt2) => let val (jumpend, C1) = makeJump C val (labelse, C2) = addLabel (cStmt stmt2 env C1) in cExpr e env (Jifeq labelse :: cStmt stmt1 env (addJump jumpend C2)) end | While(e, body) => let val labbegin = newLabel() val (jumptest, C1) = makeJump (cExpr e env (Jifne labbegin :: C)) in addJump jumptest (Jlabel labbegin :: cStmt body env C1) end | Expr e => cExpr e env (addPop C) | Block stmts => let fun pass1 [] env = [] | pass1 (s1::sr) env = let val res1 as (_, env1) = bStmtordec s1 env val resr = pass1 sr env1 in (res1 :: resr) end val stmtsback = pass1 stmts env fun pass2 [] C = C | pass2 ((BDec code, env) :: sr) C = code @ pass2 sr C | pass2 ((BStmt stmt, env) :: sr) C = cStmt stmt env (pass2 sr C) in pass2 stmtsback C end | Return _ => raise Fail "return not implemented" and bStmtordec (Stmt stmt) env : bstmtordec * venv = (BStmt stmt, env) | bStmtordec (Dec (typ, x)) env = let val (env1, code) = allocate (typ, x) env in (BDec code, env1) end (* Compiling micro-C expressions: * e is the expression to compile * env is the compile-time environment * C is the code continuation; the code following this instruction Net effect principle: if the compilation (cExpr e env C) of expression e returns the instruction sequence instrs, then the execution of instrs will have the same effect as an instruction sequence that first computes the value of expression e on the stack top and then executes C (but instrs may actually achieve this in a different way because of optimizations). When compiling for the JVM, the net effect principle requires a dup before some operations (assignment and print) that consume the value of the stack. When compiling for the JVM, the only permitted access expressions in expressions and assignments are variables x (AccVar) and array indexing a[e] (AccIndex); pointer accesses are not allowed. *) and cExpr (e : expr) (env : venv) (C : instrs) : instrs = case e of Access(AccVar x) => Jiload (cAccess x env) :: C | Access(AccIndex(AccVar x, ei)) => Jaload (cAccess x env) :: cExpr ei env (Jiaload :: C) | Access _ => raise Fail "pointer dereferencing * not implemented" | Assign(AccVar x, erhs) => addDup (cExpr erhs env []) Jdup [Jistore (cAccess x env)] C | Assign(AccIndex(AccVar x, ei), erhs) => addDup (Jaload (cAccess x env) :: cExpr ei env (cExpr erhs env [])) Jdup_x2 [Jiastore] C | Assign _ => raise Fail "unimplemented assignment" | Cst (CstI i) => addCst i C | Cst CstN => addCst 0 C | Addr acc => raise Fail "address operator (&) not implemented" | Prim1(ope, e1) => cExpr e1 env (case ope of "!" => addNot C | "printi" => addDup [] Jdup [Jinvokestatic printi] C | "printc" => addDup [] Jdup [Jinvokestatic printc] C | _ => raise Fail "unknown primitive 1") | Prim2(ope, e1, e2) => (case ope of "==" => mknumtest env e1 e2 EQUAL true C | "!=" => mknumtest env e1 e2 EQUAL false C | "<" => mknumtest env e1 e2 LESS true C | ">=" => mknumtest env e1 e2 LESS false C | ">" => mknumtest env e1 e2 GREATER true C | "<=" => mknumtest env e1 e2 GREATER false C | _ => (cExpr e1 env (cExpr e2 env (case ope of "*" => Jimul :: C | "+" => Jiadd :: C | "-" => Jisub :: C | "/" => Jidiv :: C | "%" => Jirem :: C | _ => raise Fail "unknown primitive 2")))) | Andalso(e1, e2) => (case C of Jifeq lab :: _ => cExpr e1 env (Jifeq lab :: cExpr e2 env C) | Jifne labthen :: C1 => let val (labelse, C2) = addLabel C1 in cExpr e1 env (Jifeq labelse :: cExpr e2 env (Jifne labthen :: C2)) end | _ => let val (jumpend, C1) = makeJump C val (labfalse, C2) = addLabel (addCst 0 C1) in cExpr e1 env (Jifeq labfalse :: cExpr e2 env (addJump jumpend C2)) end) | Orelse(e1, e2) => (case C of Jifne lab :: _ => cExpr e1 env (Jifne lab :: cExpr e2 env C) | Jifeq labthen :: C1 => let val (labelse, C2) = addLabel C1 in cExpr e1 env (Jifne labelse :: cExpr e2 env (Jifeq labthen :: C2)) end | _ => let val (jumpend, C1) = makeJump C val (labtrue, C2) = addLabel(addCst 1 C1) in cExpr e1 env (Jifne labtrue :: cExpr e2 env (addJump jumpend C2)) end) | Call(f, es) => raise Fail "Call not implemented" (* Don't duplicate the value if it will be discarded anyway *) and addDup code1 dup code2 C = case C of Jpop :: C1 => code1 @ code2 @ C1 | _ => code1 @ dup :: code2 @ C (* Generate code for an integer comparison. Very cumbersome in the JVM, although this could probably be simplified: *) and mknumtest env e1 e2 cmp sign C = let (* Tests that compare one stack element with zero: *) val i1test = ((Jiflt, Jifge), (Jifeq, Jifne), (Jifgt, Jifle)) (* Tests that compare two stack elements with each other: *) val i2test = ((Jif_icmplt, Jif_icmpge), (Jif_icmpeq, Jif_icmpne), (Jif_icmpgt, Jif_icmple)) (* Reverse operand order in a test: *) fun rev LESS = GREATER | rev EQUAL = EQUAL | rev GREATER = LESS (* Choose the right comparison instruction: *) fun select ((lt, ge), (eq, ne), (gt, le)) ordr b = case ordr of LESS => if b then lt else ge | EQUAL => if b then eq else ne | GREATER => if b then gt else le (* Optimize if one of the operands is zero: *) fun genTest b lbl C2 = case (e1, e2) of (_, Cst (CstI 0)) => cExpr e1 env (select i1test cmp b lbl :: C2) | (Cst (CstI 0), _) => cExpr e2 env (select i1test (rev cmp) b lbl :: C2) | _ => cExpr e1 env (cExpr e2 env (select i2test cmp b lbl :: C2)) in (* Generate a conditional jump if possible, else push 0 or 1: *) case C of Jifne lbl :: C1 => genTest sign lbl C1 | Jifeq lbl :: C1 => genTest (not sign) lbl C1 | _ => let val (jumpend, C1) = makeJump C val (labtrue, C2) = addLabel (addCst 1 C1) in genTest sign labtrue (addCst 0 (addJump jumpend C2)) end end (* Generate code to access variable (int or array): *) and cAccess x env : Localvar.index = (case Env.lookup env x of (idx, _) => idx) (* Compile a micro-C program consisting only of the main function: *) fun cProgram args localvars (Prog topdecs) : instrs * Localvar.locals = let val _ = resetLabels () val _ = setLocals localvars val endcode = [intConst 10, Jinvokestatic printc, Jreturn] fun parsearg i [] = [] | parsearg i (idx1::idxr) = Jaload args :: intConst i :: Jaaload :: Jinvokestatic parseInt :: Jistore idx1 :: parsearg (i+1) idxr in case topdecs of [Fundec(NONE, "main", params, body)] => let val (paramidxs, envf) = bindparams params Env.empty val startcode = parsearg 0 paramidxs in (startcode @ cStmt body envf endcode, getLocals()) end | _ => raise Fail "A program can have only one function void main(...)" end; fun jvmcompile2file program (classname : string) = let open Classdecl Localvar Label Bytecode val classfile = Path.joinBaseExt {base=classname, ext=SOME "class"} val outclass = Jvmtype.class { pkgs = [], name = classname } val objectClass = Jvmtype.class {pkgs=["java", "lang"], name = "Object"} (* The default constructor outclass() { super(); } *) val init : method_decl = let val locals0 = freshLocals val (locals1, this) = nextVar1 locals0 val objectInit = {class = objectClass, name = "", msig = ([], NONE)} val code = [Jaload this, Jinvokespecial objectInit, Jreturn] in {flags = [], name = "", msig = ([], NONE), attrs = [CODE {attrs = [], stack = 3, locals = 1, code = code, hdls = []}] } end (* The method public static void main(String[] args) { ... } *) val main : method_decl = let val locals0 = freshLocals val (locals1, args) = nextVar1 locals0 val (code, locals2) = cProgram args locals1 program in {flags = [ACCpublic, ACCstatic], name = "main", msig = ([Jvmtype.Tarray (Jvmtype.Tclass stringClass)], NONE), attrs = [CODE {attrs = [], stack = Stackdepth.maxdepth code [], locals = Localvar.count locals2, code = code, hdls = []} ] } end (* The complete generated class *) val myClass = {flags = [], this = outclass, super = SOME objectClass, ifcs = [], fdecls = [], mdecls = [init, main], attrs = [] } : class_decl val cp = Constpool.create() val stream = BinIO.openOut classfile in (Classfile.emit (fn u => BinIO.output1(stream, u)) cp myClass) handle Fail s => print s; BinIO.closeOut stream end (* Example programs are found in the files ex1.c, ex2.c, etc, but only some of them can be compiled using jvmcompile2file compiler; see the top of this file. *)