(* A continuation-based (backwards) compiler from uC, a fraction of the C language, to an abstract machine. sestoft@dina.kvl.dk * 2001-03-29, 2002-03-18 To compile this compiler and use it: * compile the uC lexer and parser specifications * mosmlc -c -I .. Machine.sml * mosml -P full -I .. parse.sml contcomp.sml * contcompile2file (parsef "ex11.c") "out" * java Machine out 8 The abstract machine code is generated backwards, so that jumps to jumps can be eliminated, so that tail-calls (calls immediately followed by return) can be recognized, dead code can be eliminated, etc. The compilation of a block, which may contain a mixture of declarations and statements, proceeds in two passes: Pass 1: elaborate declarations to find the environment in which each statement must be compiled; also translate declarations into allocation instructions, of type bstmtordec. Pass 2: compile the statements in the given environments. *) app load ["Absyn", "Env", "Machine"]; open Absyn Env Machine; (* The intermediate representation between passes 1 and 2 above: *) datatype bstmtordec = BDec of instr list (* Declaration of local variable *) | BStmt of stmt (* A statement *) (* ------------------------------------------------------------------- *) (* Code-generating functions that perform local optimizations *) fun makeINCSP 0 C = C | makeINCSP n C = INCSP n :: C fun addINCSP m1 C : instr list = case C of INCSP m2 :: C1 => makeINCSP (m1+m2) C1 | RET m2 :: C1 => RET (m2-m1) :: C1 | Label lab :: RET m2 :: _ => RET (m2-m1) :: C | _ => makeINCSP m1 C fun addLabel C : label * instr list = case C of Label lab :: _ => (lab, C) | GOTO lab :: _ => (lab, C) | _ => let val lab = newLabel() in (lab, Label lab :: C) end fun makeJump C : instr * instr list = case C of Label lab :: RET m :: _ => (RET m, C) | RET m :: _ => (RET m, C) | Label lab :: _ => (GOTO lab, C) | GOTO lab :: _ => (GOTO lab, C) | _ => let val lab = newLabel() in (GOTO lab, Label lab :: C) end fun makeCall m lab C : instr list = case C of RET n :: C1 => TCALL(m, n, lab) :: C1 | Label _ :: RET n :: _ => TCALL(m, n, lab) :: C | _ => CALL(m, lab) :: C fun deadcode C = case C of [] => [] | Label lab :: _ => C | _ :: C1 => deadcode C1 fun addNOT C = case C of NOT :: C1 => C1 | IFZERO lab :: C1 => IFNZRO lab :: C1 | IFNZRO lab :: C1 => IFZERO lab :: C1 | _ => NOT :: C fun addJump jump C = (* jump is GOTO or RET *) let val C1 = deadcode C in case (jump, C1) of (GOTO lab1, Label lab2 :: _) => if lab1=lab2 then C1 else GOTO lab1 :: C1 | _ => jump :: C1 end; fun addGOTO lab C = addJump (GOTO lab) C fun addCST i C = case (i, C) of (0, EQ :: C1) => addNOT C1 | (0, ADD :: C1) => C1 | (0, SUB :: C1) => C1 | (0, NOT :: C1) => addCST 1 C1 | (_, NOT :: C1) => addCST 0 C1 | (1, MUL :: C1) => C1 | (1, DIV :: C1) => C1 | (_, INCSP m :: C1) => if m < 0 then addINCSP (m+1) C1 else CSTI i :: C | (0, IFZERO lab :: C1) => addGOTO lab C1 | (_, IFZERO lab :: C1) => C1 | (0, IFNZRO lab :: C1) => C1 | (_, IFNZRO lab :: C1) => addGOTO lab C1 | _ => CSTI i :: C (* ------------------------------------------------------------------- *) (* The environment keeps track of variable addresses or offsets *) datatype var = Glovar of int (* absolute address in stack *) | Locvar of int (* address relative to bottom of frame *) type venv = (string, var * typ) env * int (* compile-time environment *) type fenv = (string, label * typ option * paramdec list) env (* Bind declared variable in env and generate code to allocate it: *) fun allocate (kind : int -> var) (typ, x) (venv : venv) : venv * instr list = let val (env, fdepth) = venv in case typ of TypA (TypA _, _) => raise Fail "allocate: arrays of arrays not permitted" | TypA (t, SOME (Cst (CstI i))) => ((bind1 env (x, (kind (fdepth+i), typ)), fdepth+i+1), [INCSP i, GETSP, CSTI (i-1), SUB]) | TypA (t, e) => raise Fail "allocate: dynamic array size" | _ => ((bind1 env (x, (kind (fdepth), typ)), fdepth+1), [CSTI 0]) end (* Bind declared parameter in env: *) fun bindparam (typ, x) (env, fdepth) : venv = (bind1 env (x, (Locvar fdepth, typ)), fdepth+1); fun bindparams params (env, fdepth) : venv = List.foldl (fn (param, res) => bindparam param res) (env, fdepth) params; (* ------------------------------------------------------------------- *) (* Global environments for variables and functions *) local val glofenv = ref Env.empty : fenv ref in fun mkvenv (topdecs : topdec list) : venv * instr list = let fun addv [] env glosize = ((env, glosize), []) | addv (Vardec typx :: tdr) env glosize = let val ((env1, glosize1), code1) = allocate Glovar typx (env, glosize) val (envr, coder) = addv tdr env1 glosize1 in (envr, code1 @ coder) end | addv (Fundec _ :: tdr) env glosize = addv tdr env glosize in addv topdecs Env.empty 0 end; fun mkfenv (topdecs : topdec list) : unit = let fun addf [] fenv = fenv | addf (Fundec(tyOpt, f, xs, body) :: tdr) fenv = addf tdr (bind1 fenv (f, (newLabel(), tyOpt, xs))) | addf (Vardec _ :: tdr) fenv = addf tdr fenv in glofenv := addf topdecs Env.empty end; fun lookupglobalfun f = lookup (!glofenv) f end (* ------------------------------------------------------------------- *) (* Compiling uC statements *) fun cStmt stmt (env : venv) (C : instr list) : instr list = case stmt of If(e, stmt1, stmt2) => let val (jumpend, C1) = makeJump C val (labelse, C2) = addLabel (cStmt stmt2 env C1) in cExpr e env (IFZERO labelse :: cStmt stmt1 env (addJump jumpend C2)) end | While(e, body) => let val labbegin = newLabel() val (jumptest, C1) = makeJump (cExpr e env (IFNZRO labbegin :: C)) in addJump jumptest (Label labbegin :: cStmt body env C1) end | Expr e => cExpr e env (addINCSP ~1 C) | Block stmts => let fun pass1 [] (_, fdepth) = ([], fdepth) | pass1 (s1::sr) env = let val res1 as (_, env1) = bStmtordec s1 env val (resr, fdepthr) = pass1 sr env1 in (res1 :: resr, fdepthr) end val (stmtsback, fdepthend) = pass1 stmts env fun pass2 [] C = C | pass2 ((BDec code, env) :: sr) C = code @ pass2 sr C | pass2 ((BStmt stmt, env) :: sr) C = cStmt stmt env (pass2 sr C) in pass2 stmtsback (addINCSP(#2 env - fdepthend) C) end | Return NONE => RET (#2 env - 1) :: deadcode C | Return (SOME e) => cExpr e env (RET (#2 env) :: deadcode C) and bStmtordec (Stmt stmt) env : bstmtordec * venv = (BStmt stmt, env) | bStmtordec (Dec (typ, x)) env = let val (env1, code) = allocate Locvar (typ, x) env in (BDec code, env1) end (* Compiling uC expressions: * e is the expression to compile * env is the compile-time environment * fdepth is the depth of the current stack frame (activation record) * C is the code continuation; the code following this instruction Net effect principle: if the compilation (cExpr e env C) of expression e returns the instruction sequence instrs, then the execution of instrs will have the same effect as an instruction sequence that first computes the value of expression e on the stack top and then executes C, but because of optimizations instrs may actually achieve this in a different way. *) and cExpr (e : expr) (env : venv) (C : instr list) : instr list = case e of Access acc => cAccess acc env (LDI :: C) | Assign(acc, e) => cAccess acc env (cExpr e env (STI :: C)) | Cst (CstI i) => addCST i C | Cst CstN => addCST 0 C | Addr acc => cAccess acc env C | Prim1(ope, e1) => cExpr e1 env (case ope of "!" => addNOT C | "printi" => PRINTI :: C | "printc" => PRINTC :: C | _ => raise Fail "unknown primitive 1") | Prim2(ope, e1, e2) => cExpr e1 env (cExpr e2 env (case ope of "*" => MUL :: C | "+" => ADD :: C | "-" => SUB :: C | "/" => DIV :: C | "%" => MOD :: C | "==" => EQ :: C | "!=" => EQ :: addNOT C | "<" => LT :: C | ">=" => LT :: addNOT C | ">" => SWAP :: LT :: C | "<=" => SWAP :: LT :: addNOT C | _ => raise Fail "unknown primitive 2")) | Andalso(e1, e2) => (case C of IFZERO lab :: _ => cExpr e1 env (IFZERO lab :: cExpr e2 env C) | IFNZRO labthen :: C1 => let val (labelse, C2) = addLabel C1 in cExpr e1 env (IFZERO labelse :: cExpr e2 env (IFNZRO labthen :: C2)) end | _ => let val (jumpend, C1) = makeJump C val (labfalse, C2) = addLabel (addCST 0 C1) in cExpr e1 env (IFZERO labfalse :: cExpr e2 env (addJump jumpend C2)) end) | Orelse(e1, e2) => (case C of IFNZRO lab :: _ => cExpr e1 env (IFNZRO lab :: cExpr e2 env C) | IFZERO labthen :: C1 => let val (labelse, C2) = addLabel C1 in cExpr e1 env (IFNZRO labelse :: cExpr e2 env (IFZERO labthen :: C2)) end | _ => let val (jumpend, C1) = makeJump C val (labtrue, C2) = addLabel(addCST 1 C1) in cExpr e1 env (IFNZRO labtrue :: cExpr e2 env (addJump jumpend C2)) end) | Call(f, es) => callfun f es env C (* Generate code to access variable, dereference pointer or index array: *) and cAccess (AccVar x) env C = (case lookup (#1 env) x of (Glovar addr, _) => addCST addr C | (Locvar addr, _) => GETBP :: addCST addr (ADD :: C)) | cAccess (AccDeref e) env C = cExpr e env C | cAccess (AccIndex(acc, idx)) env C = cAccess acc env (LDI :: cExpr idx env (ADD :: C)) (* Generate code to evaluate expressions es: *) and cExprs [] env C = C | cExprs (e1::er) env C = cExpr e1 env (cExprs er env C) (* Generate code to evaluate arguments es and then call function f: *) and callfun f es env C : instr list = let val (labf, tyOpt, paramdecs) = lookupglobalfun f val argc = List.length es in if argc = List.length paramdecs then cExprs es env (makeCall argc labf C) else raise Fail (f ^ ": parameter/argument mismatch") end (* Compile complete uC programs; generate code to invoke the `main' function *) fun cProgram (Prog topdecs) : instr list * instr list = let val _ = resetLabels () val ((globalenv, globalsize), globalinit) = mkvenv topdecs val _ = mkfenv topdecs fun compilefun (tyOpt, f, xs, body) = let val (labf, _, params) = lookupglobalfun f val (envf, fdepthf) = bindparams params (globalenv, 0) val C0 = [RET (List.length params-1)] val code = cStmt body (envf, fdepthf) C0 in Label labf :: code end val functions = List.mapPartial (fn Fundec fundec => SOME (compilefun fundec) | Vardec _ => NONE) topdecs val (mainlab, _, mainparams) = lookupglobalfun "main" val argc = List.length mainparams in (globalinit @ [STOP], [CALL(argc, mainlab), STOP] @ List.concat functions) end; fun contcompile2file program fname = let val (globalinit, functions) = cProgram program val initcode = Machine.code2ints 1 globalinit val funcstart = length initcode + 1 val funccode = Machine.code2ints funcstart functions in intstofile (funcstart :: initcode @ funccode) fname; functions end; (* Example programs are found in the files ex1.c, ex2.c, etc *)