Un exemple de compilation en détail

On va voir toutes les etapes de la compilation sur simplest.tg qui contient le programme CTigre suivant

let i:= 3 in
printint(i+2)

Première phase, on lance ocaml dans votre répértoire de travail, et on charge toplevel.ml.

Notez comment cela vous rend accessibles un certain nombre de fonctions (il est utile de voir leur type aussi!), qui vous permettent de manipuler les différentes phases de transformation et traduction du code.

        Objective Caml version 3.09+dev0 (2004-07-13)

# #use "toplevel.ml";;
val predefuns : int Absyntax.Symbol.table = <abstr>
val predeframes : Frame.frame Absyntax.Symbol.table = <abstr>
val level_main : int = 1
val mainwrapper :
  Absyntax.Absyn.exp -> Absyntax.Location.t -> Absyntax.Absyn.exp = <fun>
val ast : string -> Absyntax.Absyn.exp = <fun>
val attrast : Absyntax.Absyn.exp -> Absyntax.Absyn.exp = <fun>
val ir : Absyntax.Absyn.exp -> Frame.frag list = <fun>
val stms :
  Frame.frag list ->
  Frame.frag list * (Tree.Tree.stm list * Frame.frame) list = <fun>
val simplealloc :
  (Assem.temp * int) list -> Assem.instr list -> Assem.instr list = <fun>
val code_for_fragment :
  ((Assem.temp * int) list -> Assem.instr list -> Assem.instr list) ->
  Tree.Tree.stm list * Frame.frame -> Assem.instr list list = <fun>
val instrs :
  ((Assem.temp * int) list -> Assem.instr list -> Assem.instr list) ->
  'a * (Tree.Tree.stm list * Frame.frame) list -> 'a * Assem.instr list list =
  <fun>
val print_assem : Frame.frag list * Assem.instr list list -> unit = <fun>

On commence par parser le fichier source, ce qui va nous rendre un arbre de syntaxe abstraite, que nous gardons dans a:

# let a = ast "simplest.tg";;
val a : Absyntax.Absyn.exp =
  {exp_desc =
    LetExp
     ([FunDec
        [{fun_name = "main"; params = []; result = Some "int";
          f_level = None;
          body =
           {exp_desc =
             SeqExp
              ({exp_desc =
                 LetExp
                  ([VarDec
                     [{v_name = "i"; var_typ = None; var_level = None;
                       var_offset = None;
                       init = {exp_desc = IntExp 3; exp_loc = (8, 9)};
                       var_pos = (4, 9)}]],
                  {exp_desc =
                    Apply ({nom = "printint"; lev = None},
                     [{exp_desc =
                        Opexp (PlusOp,
                         {exp_desc =
                           VarExp
                            (SimpleVar
                              {vid = "i"; v_level = None; v_offset = None});
                          exp_loc = (22, 23)},
                         {exp_desc = IntExp 2; exp_loc = (24, 25)});
                       exp_loc = (22, 25)}]);
                   exp_loc = (13, 26)});
                exp_loc = (0, 26)},
              {exp_desc = IntExp 0; exp_loc = (-1, -1)});
            exp_loc = (-1, -1)};
          num_vars = None; fun_loc = (0, 26)}]],
     {exp_desc = Apply ({nom = "main"; lev = None}, []); exp_loc = (-1, -1)});
   exp_loc = (0, 26)}

Ensuite, on visite l'arbre pour renseigner les champs level et offset; nous gardons le resulta dans a':

# let a' = attrast a;;
val a' : Absyntax.Absyn.exp =
  {exp_desc =
    LetExp
     ([FunDec
        [{fun_name = "main"; params = []; result = Some "int";
          f_level = Some 1;
          body =
           {exp_desc =
             SeqExp
              ({exp_desc =
                 LetExp
                  ([VarDec
                     [{v_name = "i"; var_typ = None; var_level = Some 1;
                       var_offset = Some 1;
                       init = {exp_desc = IntExp 3; exp_loc = (8, 9)};
                       var_pos = (4, 9)}]],
                  {exp_desc =
                    Apply ({nom = "printint"; lev = Some 1},
                     [{exp_desc =
                        Opexp (PlusOp,
                         {exp_desc =
                           VarExp
                            (SimpleVar
                              {vid = "i"; v_level = Some 1;
                               v_offset = Some 1});
                          exp_loc = (22, 23)},
                         {exp_desc = IntExp 2; exp_loc = (24, 25)});
                       exp_loc = (22, 25)}]);
                   exp_loc = (13, 26)});
                exp_loc = (0, 26)},
              {exp_desc = IntExp 0; exp_loc = (-1, -1)});
            exp_loc = (-1, -1)};
          num_vars = Some 1; fun_loc = (0, 26)}]],
     {exp_desc = Apply ({nom = "main"; lev = Some 1}, []);
      exp_loc = (-1, -1)});
   exp_loc = (0, 26)}

Maintenant, il faut traduire a' en code intermediaire, et pour cela nous appellons la fonction ir

# let intr = ir a';;
val intr : Frame.frag list =
  [Frame.PROC
    (MOVE (TEMP 3,
      ESEQ
       (EXP
         (ESEQ (MOVE (MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 3),
           CALL (NAME "printint",
            [MEM (TEMP 30);
             BINOP (PLUS, MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 2)]))),
       CONST 0)),
    {Frame.entry = "main"; Frame.maxargs = 0; Frame.numtemps = 1})]

Notez comment le code ainsi produit n'est absolument pas en forme canonique (par exemple, il y a deux ESEQ, et l'appel a printint n'est pas sous un MOVE).

On va donc appeler sur ce code les phases de linéarisations, decoupage en blocs et construction des traces vues en cours et pour lesquelles on vous donne le code (il vous reste a écrire la fonction stms qui combine ces trois phases):

# let s = stms intr;;
val s : Frame.frag list * (Tree.Tree.stm list * Frame.frame) list =
  ([],
   [([LABEL "L1__block_entry";
      MOVE (MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 3);
      MOVE (TEMP 102,
       CALL (NAME "printint",
        [MEM (TEMP 30);
         BINOP (PLUS, MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 2)]));
      EXP (TEMP 102); MOVE (TEMP 3, CONST 0)],
     {Frame.entry = "main"; Frame.maxargs = 0; Frame.numtemps = 1})])

Maintenant, nous sommes prêts à produire le code assembleur: sur chaque fragment de type PROC, il faudra faire de la séléction d'instructions.

Pour simplifier ce qui suit, nous allons isoler le seul fragment qui nous interesse dans cet exemple (notez que votre code général devra traiter une LISTE de fragments).

# let ([],[(stml,frame)]) = s;;
Characters 4-23:
Warning: this pattern-matching is not exhaustive.
Here is an example of a value that is not matched:
(_::_, _)
  let ([],[(stml,frame)]) = s;;
      ^^^^^^^^^^^^^^^^^^^
val stml : Tree.Tree.stm list =
  [LABEL "L1__block_entry";
   MOVE (MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 3);
   MOVE (TEMP 102,
    CALL (NAME "printint",
     [MEM (TEMP 30);
      BINOP (PLUS, MEM (BINOP (MINUS, TEMP 30, CONST 4)), CONST 2)]));
   EXP (TEMP 102); MOVE (TEMP 3, CONST 0)]
val frame : Frame.frame =
  {Frame.entry = "main"; Frame.maxargs = 0; Frame.numtemps = 1}

Avec le filtrage de motif précédent, nous avons maintenant obtenu dans stml et frame les composants du fragment qui nous interesse.

Il est venu le moment de calculer maxargs pour ce bloc, ce qui peut se faire en visitant les instructions dans stml et en calculant le max de la taille des listes des arguments des instructions CALL. Une fois cela fait, il faut mettre à jour le champs maxargs de frame.

Tout ceci, dans mon compilateur, est fait dans une simple fonction adjust_frame que j'ai rangé dans le module Frame.

# Frame.adjust_frame (stml,frame);;
- : unit = ()
# frame;;
- : Frame.frame =
{Frame.entry = "main"; Frame.maxargs = 2; Frame.numtemps = 1}

Maintenant, pour chaque arbre de code intermedaire dans la liste, il faut faire la séléction d'instruction.

# let abs_code = List.map Munch.munchstm stml;;
val abs_code : Assem.instr list list =
  [[Assem.LABEL {l_assem = "L1__block_entry:\n"; lab = "L1__block_entry"}];
   [OPER {o_assem = "addi `d0 $0 3"; o_dst = [103]; o_src = []; jump = None};
    OPER
     {o_assem = "sw  `s1 -4(`s0)"; o_dst = []; o_src = [30; 103];
      jump = None}];
   [OPER
     {o_assem = "lw `d0 0(`s0)"; o_dst = [104]; o_src = [30]; jump = None};
    OPER
     {o_assem = "sw `s0 0(`s1)"; o_dst = []; o_src = [104; 29]; jump = None};
    OPER
     {o_assem = "lw `d0 -4(`s0)"; o_dst = [106]; o_src = [30]; jump = None};
    OPER
     {o_assem = "addi `d0 `s0 2"; o_dst = [105]; o_src = [106]; jump = None};
    OPER
     {o_assem = "sw `s0 4(`s1)"; o_dst = []; o_src = [105; 29]; jump = None};
    OPER
     {o_assem = "jal printint"; o_dst = [31; 2; 3; 8; 9; 4; 2; 3];
      o_src = [30; 28; 29]; jump = None};
    OPER
     {o_assem = "add `d0 `s0 $0 "; o_dst = [102]; o_src = [3]; jump = None}];
   [];
   [OPER {o_assem = "addi `d0 $0 0"; o_dst = [107]; o_src = []; jump = None};
    OPER
     {o_assem = "add `d0 `s0 $0"; o_dst = [3]; o_src = [107]; jump = None}]]

Cela produit une liste de listes, qu'il convient d'aplatir...

# let flat_abs_code = List.flatten abs_code;;
val flat_abs_code : Assem.instr list =
  [Assem.LABEL {l_assem = "L1__block_entry:\n"; lab = "L1__block_entry"};
   OPER {o_assem = "addi `d0 $0 3"; o_dst = [103]; o_src = []; jump = None};
   OPER
    {o_assem = "sw  `s1 -4(`s0)"; o_dst = []; o_src = [30; 103]; jump = None};
   OPER {o_assem = "lw `d0 0(`s0)"; o_dst = [104]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw `s0 0(`s1)"; o_dst = []; o_src = [104; 29]; jump = None};
   OPER
    {o_assem = "lw `d0 -4(`s0)"; o_dst = [106]; o_src = [30]; jump = None};
   OPER
    {o_assem = "addi `d0 `s0 2"; o_dst = [105]; o_src = [106]; jump = None};
   OPER
    {o_assem = "sw `s0 4(`s1)"; o_dst = []; o_src = [105; 29]; jump = None};
   OPER
    {o_assem = "jal printint"; o_dst = [31; 2; 3; 8; 9; 4; 2; 3];
     o_src = [30; 28; 29]; jump = None};
   OPER
    {o_assem = "add `d0 `s0 $0 "; o_dst = [102]; o_src = [3]; jump = None};
   OPER {o_assem = "addi `d0 $0 0"; o_dst = [107]; o_src = []; jump = None};
   OPER {o_assem = "add `d0 `s0 $0"; o_dst = [3]; o_src = [107]; jump = None}]

Maintenant, nous parcourons la liste des instructions abstraites pour répérer les temporaires vraiment utilisés dans notre code assembleur:

#  let ut = Assem.used_temps Mips.truereg flat_abs_code;;
val ut : Assem.temp list = [107; 106; 105; 104; 103; 102]

Et nous utilisons cette liste (ATTENTION: chaque temporaire doit apparaitre dans la liste UNE SEULE FOIS, sinon le code qui suit est faux!) pour associer une position dans le bloc d'activation pour chaque temporaire.

# let tempsmapping = 
    List.map (fun t -> t,Frame.alloc_local frame) ut;;
val tempsmapping : (Assem.temp * int) list =
  [(102, 8); (103, 12); (104, 16); (105, 20); (106, 24); (107, 28)]

Notez comment cette operation a modifié le frame, vu que chaque appel a alloc_local increment la valeur de frame.numtemps.

# frame;;
- : Frame.frame =
{Frame.entry = "main"; Frame.maxargs = 2; Frame.numtemps = 7}

On a presque fini: on construit le prologue et l'épilogue

# let prol=Mips.emit_prologue frame;;
prol : Assem.instr list =
[COMMENT
  "\n# start prologue of main\n\nmain_framesize=48\nmain_frameoffset=32\n";
 Assem.LABEL {l_assem = "main:"; lab = "main"};
 OPER
  {o_assem = "sub `d0 `s0 main_framesize"; o_dst = [29]; o_src = [29];
   jump = None};
 OPER
  {o_assem = "sw `s0 main_framesize-main_frameoffset(`s1)"; o_dst = [];
   o_src = [30; 29]; jump = None};
 OPER
  {o_assem = "sw `s0 (main_framesize-main_frameoffset)-4(`s1)"; o_dst = [];
   o_src = [31; 29]; jump = None};
 OPER
  {o_assem = "add `d0 `s0 main_framesize"; o_dst = [30]; o_src = [29];
   jump = None};
 COMMENT "#end prologue of main\n\n"]

# let epil=Mips.emit_epilogue frame;;
epil : Assem.instr list = ...

Et on expanse chaque instruction assembleur en une, deux, trois ou quatre instruction selon le besoin (il faut remplacer chaque temporaire source par une instruction de chargement dans $10 et eventuellemnt $11, et le temporaire destination par une écriture de $11 dans le bloc d'activation, à l'offset indiqué par tempsmapping).

Tout ceci est fait par la fonction simplealloc:

# let codeducorps = List.flatten (List.map (fun s -> simplealloc tempsmapping s) abs_code);;
val codeducorps : Assem.instr list =
  [Assem.LABEL {l_assem = "L1__block_entry:\n"; lab = "L1__block_entry"};
   COMMENT "\n# start of addi t103 $0 3";
   OPER {o_assem = "addi `d0 $0 3"; o_dst = [10]; o_src = []; jump = None};
   OPER
    {o_assem = "sw   `s0, -12(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of sw  t103 -4(t30)";
   OPER
    {o_assem = "lw   `d0, -12(`s0)"; o_dst = [11]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw  `s1 -4(`s0)"; o_dst = []; o_src = [30; 11]; jump = None};
   COMMENT "\n# start of lw t104 0(t30)";
   OPER {o_assem = "lw `d0 0(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw   `s0, -16(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of sw t104 0(t29)";
   OPER
    {o_assem = "lw   `d0, -16(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw `s0 0(`s1)"; o_dst = []; o_src = [10; 29]; jump = None};
   COMMENT "\n# start of lw t106 -4(t30)";
   OPER {o_assem = "lw `d0 -4(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw   `s0, -24(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of addi t105 t106 2";
   OPER
    {o_assem = "lw   `d0, -24(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER {o_assem = "addi `d0 `s0 2"; o_dst = [10]; o_src = [10]; jump = None};
   OPER
    {o_assem = "sw   `s0, -20(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of sw t105 4(t29)";
   OPER
    {o_assem = "lw   `d0, -20(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER
    {o_assem = "sw `s0 4(`s1)"; o_dst = []; o_src = [10; 29]; jump = None};
   COMMENT "\n# start of jal printint";
   OPER
    {o_assem = "jal printint"; o_dst = [31; 2; 3; 8; 9; 4; 2; 3];
     o_src = [30; 28; 29]; jump = None};
   COMMENT "\n# start of add t102 t3 $0 ";
   OPER {o_assem = "add `d0 `s0 $0 "; o_dst = [10]; o_src = [3]; jump = None};
   OPER
    {o_assem = "sw   `s0, -8(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of addi t107 $0 0";
   OPER {o_assem = "addi `d0 $0 0"; o_dst = [10]; o_src = []; jump = None};
   OPER
    {o_assem = "sw   `s0, -28(`s1)"; o_dst = []; o_src = [10; 30];
     jump = None};
   COMMENT "\n# start of add t3 t107 $0";
   OPER
    {o_assem = "lw   `d0, -28(`s0)"; o_dst = [10]; o_src = [30]; jump = None};
   OPER {o_assem = "add `d0 `s0 $0"; o_dst = [3]; o_src = [10]; jump = None}]

On construit le code pour le fragment en concatenant le prologue, le corps et l'épilogue:

# let code = prol@codeducorps@epil;;

Et enfin, il ne nous reste plus qu'à imprimer le code assembleur complet ...

#   print_assem ([],[code]);; 


# Assembler avec allocation naive:

    .data

    .text
L1__block_entry:


# start of addi t103 $0 3
addi $t2 $0 3
sw   $t2, -12($fp)

# start of sw  t103 -4(t30)
lw   $t3, -12($fp)
sw  $t3 -4($fp)

# start of lw t104 0(t30)
lw $t2 0($fp)
sw   $t2, -16($fp)

# start of sw t104 0(t29)
lw   $t2, -16($fp)
sw $t2 0($sp)

# start of lw t106 -4(t30)
lw $t2 -4($fp)
sw   $t2, -24($fp)

# start of addi t105 t106 2
lw   $t2, -24($fp)
addi $t2 $t2 2
sw   $t2, -20($fp)

# start of sw t105 4(t29)
lw   $t2, -20($fp)
sw $t2 4($sp)

# start of jal printint
jal printint

# start of add t102 t3 $0 
add $t2 $v1 $0 
sw   $t2, -8($fp)

# start of addi t107 $0 0
addi $t2 $0 0
sw   $t2, -28($fp)

# start of add t3 t107 $0
lw   $t2, -28($fp)
add $v1 $t2 $0
- : unit = ()

Il suffit maintenant de concatener runtime.s à ce code, et lancer spim.