|;
+
+ # Tables
+ s|\\begin{tabular}{($PARAM)}|&begintab($1,"")|oge;
+ s|\\end{tabular}|&endtab()|oge;
+
+ s|\\begin{Table}{($PARAM)}{($PARAM)}|&begintab($1,$2)|oge;
+ s|\\end{Table}|&endtab()|oge;
+
+ s|\&|&addtab()|oge if $MODE eq "tabular";
+ s|\\\\|&tabnewline()|oge if $MODE eq "tabular";
+ s|\\\\|&newline()|oge if $MODE eq "latex";
+
+ # SML mode handling
+ s|\\sml{($PARAM)}|&sml($1)|oge;
+ s|\\code{($PARAM)}|\1|g;
+ if ($MODE eq "sml") { $_ = smldisplay($_); }
+
+ # Math mode handling
+ if ($MODE ne "sml") { s|\$($MATHPARAM)\$|&math($1)|eg; }
+ s|\\\(($MATHPARAM2)\\\)|&math($1)|oeg;
+ if (/\\\[\s*\S|\S\s*\\\[/)
+ { error("please put \\[ on a separate line by itself"); }
+ if (/^\s*\S+\s*\\\]\s*$/)
+ { error("please put \\] on a separate line by itself"); }
+ if ($MODE eq "displaymath") { $_ = math($_); }
+ if ($MODE eq "eqnarray*") { $_ = math($_); }
+ if ($MODE eq "eqnarray") { $_ = math($_); }
+
+ if (s|\\\[\s*$|
|) { enterMode("displaymath"); }
+ if (s|\\\]|
|) { leaveMode("displaymath"); }
+ if (s|\\begin{eqnarray\*}|
|) { enterMode("eqnarray*"); }
+ if (s|\\end{eqnarray\*}|
|) { leaveMode("eqnarray*"); }
+ if (s|\\begin{eqnarray}|
|) { enterMode("eqnarray"); }
+ if (s|\\end{eqnarray}|
|) { leaveMode("eqnarray"); }
+
+ # How to handle environment
+ s|~?\\begin{($ENV0)}|&beginenv($1)|oge;
+ s|~?\\begin{($ENV1)}{($PARAM)}|&beginenv($1,$2)|oge;
+ s|~?\\begin{($ENV2)}{($PARAM)}{($PARAM)}|&beginenv($1,$2,$3)|oge;
+ s|~?\\end{($PARAM)}|&endenv($1)|oge;
+
+ # How to handle commands
+ s/~?\\($COMMAND0)(\{\}|\b)/&command($1)/oge;
+ s|~?\\($COMMAND1){($PARAM)}|&command($1,$2)|oge;
+ s|~?\\($COMMAND2){($PARAM)}{($PARAM)}|&command($1,$2,$3)|oge;
+ s|~?\\($COMMAND3){($PARAM)}{($PARAM)}{($PARAM)}|&command($1,$2,$3,$4)|oge;
+
+ # Math mode stuff
+ s|\\_|_|g;
+ s|\\{|{|g;
+ s|\\}|}|g;
+
+ # Indentation
+ s|\\noindent ||g;
+
+ #
+ # Not all browers can handle the math stuff yet. Do this in the mean time.
+ #
+ stupidBrowserMath();
+
+ push @TEXT, $_;
+ }
+ close INFILE;
+ return ($PAGE_TITLE, @TEXT);
+
+}
+
+############################################################################
+#
+# Main loop for processing a document
+#
+############################################################################
+sub processDocument
+{ my($filename) = @_;
+ my($line, $output);
+
+ globalInit();
+
+ $MODE = "latex";
+ @MODE_STACK=($MODE);
+
+ $PATHNAME = "";
+ if ($filename =~ /^(.*\/)[^\/]*$/) { $PATHNAME = $1; }
+
+ $LINE_NO=0;
+ $FILENAME=$filename;
+
+ open (DOCUMENT, $filename) || die("$! $filename");
+ while ($_ = )
+ { $LINE_NO++;
+ s|%.*$||; #skip comments
+
+ s|\\\\|&newline()|oge if $MODE eq "latex";
+
+ # How to handle environment
+ s|~?\\begin{($ENV0)}|&beginenv($1)|oge;
+ s|~?\\begin{($ENV1)}{($PARAM)}|&beginenv($1,$2)|oge;
+ s|~?\\begin{($ENV2)}{($PARAM)}{($PARAM)}|&beginenv($1,$2,$3)|oge;
+ s|~?\\end{($PARAM)}|&endenv($1)|oge;
+
+ # How to handle commands
+ s/~?\\($COMMAND0)(\{\}|\b)/&command($1)/oge;
+ s|~?\\($COMMAND1){($PARAM)}|&command($1,$2)|oge;
+ s|~?\\($COMMAND2){($PARAM)}{($PARAM)}|&command($1,$2,$3)|oge;
+ s|~?\\($COMMAND3){($PARAM)}{($PARAM)}{($PARAM)}|&command($1,$2,$3,$4)|oge;
+
+ push @TEXT, $_;
+ }
+ close DOCUMENT;
+
+ # Write out the sections
+ foreach $file (keys %SECTION_TEXT)
+ { my($output) = $SECTION_TEXT{$file};
+ my($title, $localtoc, @text) = @{$output};
+ writeSection($file, $title, $localtoc, @text);
+ }
+
+ # Write out the main page
+ writeDocument($filename);
+}
+
+#main
+foreach $file (@ARGV)
+{ processDocument($file)
+}
+
+
diff --git a/MLRISC/Doc/latex/C6.tex b/MLRISC/Doc/latex/C6.tex
new file mode 100644
index 0000000..66d7790
--- /dev/null
+++ b/MLRISC/Doc/latex/C6.tex
@@ -0,0 +1,3 @@
+\section{The TI C6x Back End}
+
+No documentation yet.
diff --git a/MLRISC/Doc/latex/ILP.tex b/MLRISC/Doc/latex/ILP.tex
new file mode 100644
index 0000000..c860e4a
--- /dev/null
+++ b/MLRISC/Doc/latex/ILP.tex
@@ -0,0 +1,23 @@
+\section{ILP Optimizations}
+\subsection{Introduction}
+ This section is under construction. A new scheduler framework
+for superscalars that ties into the machine description language
+is currently being developed.
+\subsection{The ILP ToolBox}
+\subsubsection{List Scheduler}
+\subsubsection{Ranking Algorithms}
+ Some more complex ranking algorithms (than say critical path) have been
+implemented. These are:
+\begin{itemize}
+ \item The algorithm of
+ \mlrischref{scheduling/PalemSimons.sig}{Palem and Simons}
+ which appeared in TOPLAS '93. This algorithm
+ computes the modified deadlines of a set instructions, with
+ precedence, latency, and deadlines constraints.
+
+ \item The algorithm of
+ \mlrischref{scheduling/LeungPalemPnueli.sig}{Leung, Palem, and Pnueli}
+ which appeared in PACT '98.
+ This algorithm computes the modified deadlines of a set of instructions,
+ with precedence, latency, release-times and deadline constraints.
+\end{itemize}
diff --git a/MLRISC/Doc/latex/INTRO.tex b/MLRISC/Doc/latex/INTRO.tex
new file mode 100644
index 0000000..406ff4d
--- /dev/null
+++ b/MLRISC/Doc/latex/INTRO.tex
@@ -0,0 +1,48 @@
+\section{MLRISC}
+ \begin{center}
+ \begin{Bold}
+ A framework for retargetable and optimizing compiler back ends
+ \end{Bold}
+ \end{center}
+\begin{center}
+ \begin{tabular}{cc}
+ \begin{address}
+ \href{mailto:george@research.bell-labs.com}{Lal George}
+ \end{address} &
+ \begin{address}
+ \href{mailto:leunga@cs.nyu.edu}{ Allen Leung}
+ \end{address} \\
+ Bell Labs & New York University \\
+ \end{tabular}
+\end{center}
+
+\begin{center}
+\image{MLRISC logo}{pictures/png/uncol.png}{align="middle"}
+
+\begin{Italics}
+ \href{contributors.html}{Contributors}
+\end{Italics}
+\end{center}
+
+Writing native code generators for modern processors is a significant
+investment. Unfortunately it is difficult
+to reuse this investment for other architectures, and even more
+difficult to reuse for other source language compilers. MLRISC is
+a customizable optimizing back-end written in
+\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/sml.html}{Standard ML}
+and has been successfully retargeted to multiple architectures.
+MLRISC deals elegantly with the special requirements imposed by the
+execution model of different high-level, typed languages, by allowing
+many components of the system to be customized to fit the source language
+semantics and runtime system requirements.
+
+The \begin{color}{#aa0000}Overview\end{color} pages on the left provide
+an introduction the MLRISC system, mostly from the client's perspective,
+while the \begin{color}{#aa0000}System\end{color}
+pages give a more detailed look at the
+innards, and are of interest to MLRISC hackers. As usual, development of
+the system has outpaced the documentation process substantally; thus
+the latter part of the document is incomplete but it may still be useful.
+
+These pages are also available in
+\href{../latex/mlrisc.ps}{tech report} form.
diff --git a/MLRISC/Doc/latex/Makefile b/MLRISC/Doc/latex/Makefile
new file mode 100644
index 0000000..9e3246e
--- /dev/null
+++ b/MLRISC/Doc/latex/Makefile
@@ -0,0 +1,35 @@
+#
+# This file compiles the documentation
+#
+
+TEX= sml.tex mlrisc.tex mltexdoc.tex
+PDF= $(TEX:.tex=.pdf)
+
+all: pictures $(PDF)
+
+pictures:
+ (cd ../pictures; make)
+
+pdf: $(PDF)
+
+%.pdf:
+ latexmk -bibtex -ps- -pdf $(@:.pdf=.tex)
+
+mlrisc.pdf: annotations.tex graphics.tex instructions.tex mltree.tex \
+ delayslots.tex cells.tex cluster.tex constants.tex \
+ pseudo-ops.tex streams.tex labelexp.tex labels.tex regions.tex \
+ regmap.tex graphs.tex mlrisc-ir.tex compiler-graphs.tex \
+ SSA.tex VLIW.tex ra.tex ILP.tex mlrisc-arch.tex \
+ future-work.tex asm.tex mc.tex instrsel.tex \
+ availability.tex gc.tex mlrisc-gen.tex contributions.tex \
+ systems.tex mlrisc-graphics.tex contributors.tex \
+ requirements.tex INTRO.tex problem.tex mlrisc-compiler.tex \
+ mlrisc-ir-rep.tex backend-opt.tex sys-integration.tex \
+ optimizations.tex span-dep.tex mlrisc-md.tex line-counts.tex \
+ sparc.tex alpha.tex x86.tex ppc.tex mips.tex hppa.tex C6.tex \
+ mltree-ext.tex mltree-util.tex
+
+mltexdoc.pdf: mltex.tex
+
+clean:
+ rm -f $(PDF) *.aux *.log *.bbl *.blg
diff --git a/MLRISC/Doc/latex/SSA.tex b/MLRISC/Doc/latex/SSA.tex
new file mode 100644
index 0000000..f181219
--- /dev/null
+++ b/MLRISC/Doc/latex/SSA.tex
@@ -0,0 +1,21 @@
+\section{SSA Optimizations}\label{sec:ssa}
+
+All SSA optimization modules satisfy the signature
+\mlrischref{SSA/ssa-optimization.sig}{SSA\_OPTIMIZATION},
+which is defined as:
+\begin{SML}
+signature SSA_OPTIMIZATION = sig
+ structure SSA : SSA
+
+ val optimize : SSA.ssa -> SSA.ssa
+end
+\end{SML}
+
+The following SSA based scalar optimizations have been implemented in MLRISC.
+\begin{itemize}
+\item \mlrischref{SSA/ssa-dead-code-elim.sml}{Dead code elimination}
+\item \mlrischref{SSA/ssa-gvn.sml}{Global value numbering, constant folding, algebraic simplication}
+\item \mlrischref{SSA/ssa-gcm.sml}{Global code motion}
+\item \mlrischref{SSA/ssa-cond-const-prop.sml}{Conditional constant propagation}
+\item \mlrischref{SSA/ssa-op-str-red.sml}{Strength reduction}
+\end{itemize}
diff --git a/MLRISC/Doc/latex/VLIW.tex b/MLRISC/Doc/latex/VLIW.tex
new file mode 100644
index 0000000..36761c8
--- /dev/null
+++ b/MLRISC/Doc/latex/VLIW.tex
@@ -0,0 +1,21 @@
+\section{Optimizations for VLIW/EPIC Architectures}
+
+\subsection{Overview}
+Many newer architectures such as the upcoming IA-64 and the
+DSPs such as the C6 are VLIW or so called EPIC machines.
+These architectures depends on the compiler to
+extract instruction level parallelism (\newdef{ILP})
+and data level parallelism (\newdef{DLP}).
+
+Optimizations for these architectures include:
+\begin{itemize}
+ \item Hyperblock construction
+ \item Predication and predicate analysis
+ \item Hyperblock scheduling
+ \item Modulo scheduling
+\end{itemize}
+
+\subsection{Hyperblocks}
+\subsection{Predicate Analysis}
+\subsection{Hyperblock Scheduling}
+\subsection{Modulo Scheduling}
diff --git a/MLRISC/Doc/latex/alpha.tex b/MLRISC/Doc/latex/alpha.tex
new file mode 100644
index 0000000..48a54ea
--- /dev/null
+++ b/MLRISC/Doc/latex/alpha.tex
@@ -0,0 +1,97 @@
+\section{The Alpha Back End}
+
+\subsection{Trap Shadows, Floating Exceptions, and Denormalized Numbers on the DEC Alpha}
+
+ \emph{By Andrew W. Appel and Lal George, Nov 28, 1995}
+
+ See section 4.7.5.1 of the \emph{Alpha Architecture Reference Manual}.
+
+ The Alpha has imprecise exceptions, meaning that if a floating
+ point instruction raises an IEEE exception, the exception may
+ not interrupt the processor until several successive instructions have
+ completed. ML, on the other hand, may want a "precise" model
+ of floating point exceptions.
+
+ Furthermore, the Alpha hardware does not support denormalized numbers
+ (for ``gradual underflow''). Instead, underflow always rounds to zero.
+ However, each floating operation (add, mult, etc.) has a trapping
+ variant that will raise an exception (imprecisely, of course) on
+ underflow; in that case, the instruction will produce a zero result
+ AND an exception will occur. In fact, there are several variants
+ of each instruction; three variants of MULT are:
+\begin{description}
+ \item[MULT s1,s2,d] truncate denormalized result to zero; no exception
+ \item[MULT/U s1,s2,d] truncate denormalized result to zero; raise UNDERFLOW
+ \item[MULT/SU s1,s2,d] software completion, producing denormalized result
+\end{description}
+
+ The hardware treats the \verb|MULT/U| and \verb|MULT/SU|
+ instructions identically,
+ truncating a denormalized result to zero and raising the UNDERFLOW
+ exception. But the operating system, on an UNDERFLOW exception,
+ examines the faulting instruction to see if it's an \verb|/SU|
+ form, and if so,
+ recalculates \verb|s1*s2|, puts the right answer in \verb|d|, and continues,
+ all without invoking the user's signal handler.
+
+ Because most machines compute with denormalized numbers in hardware,
+ to maximize portability of SML programs, we use the \verb|MULT/SU| form.
+ (and \verb|ADD/SU|, \verb|SUB/SU|, etc.) But to use this form successfully,
+ certain rules have to be followed. Basically, d cannot be the same
+ register as s1 or s2, because the opsys needs to be able to
+ recalculate the operation using the original contents of s1 and s2,
+ and the MULT/SU instruction will overwrite d even if it traps.
+
+ More generally, we may want to have a sequence of floating-point
+ instructions. The rules for such a sequence are:
+
+ 1. The sequence should end with a \verb|TRAPB| (trap barrier) instruction.
+ (This could be relaxed somewhat, but certainly a \verb|TRAPB| would
+ be a good idea sometime before the next branch instruction or
+ update of an ML reference variable, or any other ML side effect.)
+ 2. No instruction in the sequence should destroy any operand of itself
+ or of any previous instruction in the sequence.
+ 3. No two instructions in the sequence should write the same destination
+ register.
+
+ We can achieve these conditions by the following trick in the
+ Alpha code generator. Each instruction in the sequence will write
+ to a different temporary; this is guaranteed by the translation from
+ ML-RISC. At the beginning of the sequence, we will put a special
+ pseudo-instruction (we call it \verb|DEFFREG|) that ``defines''
+ the destination
+ register of the arithmetic instruction. If there are $K$ arithmetic
+ instructions in the sequence, then we'll insert $K$
+ \verb|DEFFREG| instructions
+ all at the beginning of the sequence.
+ Then, each arithop will not only ``define'' its destination temporary
+ but will ``use'' it as well. When all these instructions are fed to
+ the liveness analyzer, the resulting interference graph will then
+ have inteference edges satisfying conditions 2 and 3 above.
+
+ Of course, \verb|DEFFREG| doesn't actually generate any code. In our model
+ of the Alpha, every instruction generates exactly 4 bytes of code
+ except the ``span-dependent'' ones. Therefore, we'll specify \verb|DEFFREG|
+ as a span-dependent instruction whose minimum and maximum sizes are zero.
+
+ At the moment, we do not group arithmetic operations into sequences;
+ that is, each arithop will be preceded by a single \verb|DEFFREG| and
+ followed by a \verb|TRAPB|. To avoid the cost of all those \verb|TRAPB|'s,
+ we should improve this when we have time. Warning: Don't put more
+ than 31 instructions in the sequence, because they're all required
+ to write to different destination registers!
+
+ What about multiple traps? For example, suppose a sequence of
+ instructions produces an Overflow and a Divide-by-Zero exception?
+ ML would like to know only about the earliest trap, but the hardware
+ will report \emph{BOTH} traps to the operating system. However, as long
+ as the rules above are followed (and the software-completion versions
+ of the arithmetic instructions are used), the operating system will
+ have enough information to know which instruction produced the
+ trap. It is very probable that the operating system will report \emph{ONLY}
+ the earlier trap to the user process, but I'm not sure.
+
+ For a hint about what the operating system is doing in its own
+ trap-handler (with software completion), see section 6.3.2 of
+ ``\emph{OpenVMS Alpha Software}'' (Part II of the Alpha Architecture
+ Manual). This stuff should apply to Unix (OSF1) as well as VMS.
diff --git a/MLRISC/Doc/latex/annotations.tex b/MLRISC/Doc/latex/annotations.tex
new file mode 100644
index 0000000..4998fa0
--- /dev/null
+++ b/MLRISC/Doc/latex/annotations.tex
@@ -0,0 +1,33 @@
+\section{Annotations}
+
+\subsection{Overview}
+A compiler front-end has to be propagate information to
+the back-end. An optimization phase may have to leave behind information
+at various places of the IR so that other phases can reuse such information.
+MLRISC uses the \newdef{annotations}
+mechanism for these functions.
+Individual instructions, basic blocks, and flow graph edges,
+can be attached one or more annotations.
+
+The basic MLRISC system understands many annotations. Some examples are:
+\begin{description}
+ \item[COMMENT]
+ these can be used to attach comments. If attached to
+ an instruction, the assemblers will output
+ them as part of their assembly output.
+ \item[BRANCH\_PROB]
+ these can be attached to a branch instruction to indicate
+ the probability in which is it taken.
+ \item[EXECUTION\_FREQ]
+ these can be attached to a basic block to indicate
+ its expected execution frequency
+\end{description}
+
+\subsection{Details}
+The primitive annotations datatype is defined
+to have this \mlrischref{library/annotations.sig}{signature}.
+In addition, MLRISC predefined a few primitive annotations that are
+recognized by the core system. This signature is
+\mlrischref{instructions/mlriscAnnotations.sig}{MLRISC\_ANNOTATIONS}.
+More detailed documentation can be found in this
+\href{http://cm.bell-labs.com/cm/cs/what/smlnj/compiler-notes/annotations.ps}{paper}.
diff --git a/MLRISC/Doc/latex/asm.tex b/MLRISC/Doc/latex/asm.tex
new file mode 100644
index 0000000..58ce07a
--- /dev/null
+++ b/MLRISC/Doc/latex/asm.tex
@@ -0,0 +1,60 @@
+\section{Assemblers}
+
+\subsubsection{Overview}
+Assemblers in MLRISC satisfy the signature
+\mlrischref{emit/instruction-emitter.sig}{INSTRUCTION\_EMITTER},
+which is defined as:
+\begin{SML}
+signature INSTRUCTION_EMITTER =
+sig
+ structure I : \href{instructions.html}{INSTRUCTIONS}
+ structure C : \href{cells.html}{CELLS}
+ structure S : \href{streams.html}{INSTRUCTION_STREAM}
+ structure P : \href{pseudo-ops.html}{PSEUDO_OPS}
+ sharing I.C = C
+ sharing S.P = P
+
+ val makeStream : Annotations.annotations ->
+ ((int -> int) -> I.instruction -> unit,
+ unit,'b,'c,'d,'e) S.stream
+end
+\end{SML}
+
+The function \sml{makeStream} returns an instruction stream.
+By default the output is bound to the stream \sml{AsmStream.asmOutStream}
+defined in the structure
+\mlrischref{emit/asmStream.sml}{AsmStream} at creation time.
+
+The structure \sml{AsmStream} satisfy the following signature.
+\begin{SML}
+signature ASM_STREAM = sig
+ val asmOutStream : TextIO.outstream ref
+ val withStream : TextIO.outstream -> ('a -> 'b) -> 'a -> 'b
+end
+\end{SML}
+\subsubsection{Redirecting the Output}
+It is possible to redirect the output of an instruction stream.
+For example, the following statement
+\begin{SML}
+ val asm = makeStream []
+\end{SML}
+binds the output of \sml{asm} to \sml{AsmStream.asmOutStream}, which
+by default is just \sml{TextIO.stdOut}. On the other hand, the
+statement
+\begin{SML}
+ val asm = AsmStream.withStream mystream makeStream []
+\end{SML}
+binds the output of asm to \sml{mystream}.
+
+\subsubsection{More Details}
+
+Assemblers are automatically generated by the
+\href{mlrisc-md.html}{MDGen} tool. Some specific generated
+assemblers are listed below:
+\begin{enumerate}
+ \item \mlrischref{sparc/emit/sparcAsm.sml}{Sparc}
+ \item \mlrischref{hppa/emit/hppaAsm.sml}{Hppa}
+ \item \mlrischref{alpha/emit/alphaAsm.sml}{Alpha}
+ \item \mlrischref{ppc/emit/ppcAsm.sml}{Power PC}
+ \item \mlrischref{x86/emit/x86Asm.sml}{X86}
+\end{enumerate}
diff --git a/MLRISC/Doc/latex/availability.tex b/MLRISC/Doc/latex/availability.tex
new file mode 100644
index 0000000..9249c02
--- /dev/null
+++ b/MLRISC/Doc/latex/availability.tex
@@ -0,0 +1,33 @@
+\section{How to Obtain MLRISC}
+
+There are a few ways to obtain the MLRISC system.
+\begin{enumerate}
+\item
+An old version of MLRISC is available from
+\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/doc/MLRISC/quick-tour/index.html}{this link}.
+This version is stable but very out-dated, and does
+not contain the most up-to-date features.
+\item
+New experimental versions are available from the
+\externhref{http://cm.bell-labs.com/cm/cs/what/smlnj/software.html}{SML/NJ software page} as part of the SML/NJ compiler releases.
+These versions are relative stable, but
+do not include the entire MLRISC source tree.
+\item \href{mailto:leunga@cs.nyu.edu}{Allen}
+keeps an up-to-date version of MLRISC at NYU for private use.
+This version includes everything but is under constant changes, so beware!
+To access the CVS repository, set your \sml{CVSROOT} environment variable
+to
+\begin{verbatim}
+ :pserver:mlrisc@react-ilp.cs.nyu.edu:/home/leunga/mlrisc
+\end{verbatim}
+and checkout the repository using
+\begin{verbatim}
+ cvs co MLRISC++
+\end{verbatim}
+The password to use is \sml{mlrisc}.
+\item
+Generally speaking, you can get the latest version of MLRISC by asking
+\href{mailto:george@research.bell-labs.com}{Lal}.
+\end{enumerate}
+MLRISC is \newdef{free, open source} software, and is released under the
+\href{http://cm.bell-labs.com/cm/cs/what/smlnj/license.html}{SML/NJ license}.
diff --git a/MLRISC/Doc/latex/backend-opt.tex b/MLRISC/Doc/latex/backend-opt.tex
new file mode 100644
index 0000000..d947242
--- /dev/null
+++ b/MLRISC/Doc/latex/backend-opt.tex
@@ -0,0 +1,15 @@
+\section{Back End Optimizations}
+
+ Once MLRisc trees have been generated, they are passed into a module
+ that generates a flowgraph of target machine instructions. Again,
+ this module and all subsequent optimization phases have been
+ specialized to the front end.
+ \image{Back end optimizations}{pictures/png/optimization.png}{align=right}
+ Nearly all
+ instruction selection modules provided by MLRISC use a simple tree
+ pattern matching algorithm rather than the more heavy weight BURG
+ tools --- including the x86 \begin{color}{#580000} It is important to
+ emphasis that all optimizations are performed on the flowgraph of
+ target machine instructions and \emph{not} MLRisc
+ immediate IR. \end{color} There is complete flexibility in the order,
+ and nature of the optimizations performed.
diff --git a/MLRISC/Doc/latex/cells.tex b/MLRISC/Doc/latex/cells.tex
new file mode 100644
index 0000000..7ea0ce9
--- /dev/null
+++ b/MLRISC/Doc/latex/cells.tex
@@ -0,0 +1,146 @@
+\section{Cells}
+
+MLRISC uses
+the \mlrischref{instructions/cells.sig}{CELLS}
+interface to define all readable/writable resources
+in a machine architecture, or \emph{cells}
+The types defined herein are:
+\begin{itemize}
+ \item \sml{cellkind} -- different classes of cells are assigned
+ difference cellkinds. The following cellkinds should be present
+ \begin{itemize}
+ \item \sml{GP} -- general purpose registers.
+ \item \sml{FP} -- floating point registers.
+ \item \sml{CC} -- condition code registers.
+ \end{itemize}
+ In addition, the cellkinds \sml{MEM} and \sml{CTRL}
+ should also be defined. These are used for representing
+ memory based data dependence and control dependence.
+ \begin{itemize}
+ \item \sml{MEM} -- memory
+ \item \sml{CTRL} -- control dependence
+ \end{itemize}
+ \item \sml{regmap} -- \href{regmap.html}{register map}
+ \item \sml{cellset} -- a cellset represent a set of cells. This
+ type can be used to denote live-in/live-out information. Cellsets are
+ implemented as immutable abstract types.
+\end{itemize}
+
+These core definitions are defined in the following signature
+\begin{SML}
+signature \mlrischref{instructions/cells.sig}{CELLS\_BASIS} =
+sig
+ eqtype cellkind
+ type cell = int
+ type regmap = cell Intmap.intmap
+ exception Cells
+
+ val cellkinds : cellkind list
+ val cellkindToString : cellkind -> string
+ val firstPseudo : cell
+ val Reg : cellkind -> int -> cell
+ val GPReg : int -> cell
+ val FPReg : int -> cell
+ val cellRange : cellkind -> {low:int, high:int}
+ val newCell : cellkind -> 'a -> cell
+ val cellKind : cell -> cellkind
+ val updateCellKind : cell * cellkind -> unit
+ val numCell : cellkind -> unit -> int
+ val maxCell : unit -> cell
+ val newReg : 'a -> cell
+ val newFreg : 'a -> cell
+ val newVar : cell -> cell
+ val regmap : unit -> regmap
+ val lookup : regmap -> cell -> cell
+ val reset : unit -> unit
+end
+\end{SML}
+
+\begin{itemize}
+ \item\sml{cellkinds} -- this is a list of all the cellkinds defined in the
+architecture
+ \item\sml{cellkindToString} -- this function maps a cellkind into its name
+ \item\sml{firstPseudo} -- MLRISC numbered physical resources
+ in the architecture from 0 to firstPseudo-1.
+ This is the first usable virtual register number.
+ \item\sml{Reg} -- This function maps the $i$th physical
+ resource of a particular cellkind to its internal encoding used by MLRISC.
+ Note that all resources in MLRISC are named uniquely.
+ \item\sml{GPReg} -- abbreviation for \sml{Reg GP}
+ \item\sml{FPReg} -- abbreviation for \sml{Reg FP}
+ \item \sml{cellRange} -- this returns a range \sml{{low, high}}
+ when given a cellkind, with denotes the range of physical resources
+ \item \sml{newCell} -- This function returns a new virtual register
+ of a particular cellkind.
+ \item \sml{newReg} -- abbreviation as \sml{newCell GP}
+ \item \sml{newFreg} -- abbreviation as \sml{newCell FP}
+ \item \sml{cellKind} -- When given a cell number, this returns its
+ cellkind. Note that this feature is not enabled by default.
+ \item \sml{updateCellKind} -- updates the cellkind of a cell.
+ \item \sml{numCell} -- returns the number of virtual cells allocated for one cellkind.
+ \item \sml{maxCell} -- returns the next virtual cell id.
+ \item \sml{newVar} -- given a cell id, return a new cell id of
+ the same cellkind.
+ \item \sml{regmap} -- This function returns a new empty regmap
+ \item \sml{lookup} -- This converts a regmap into a lookup function.
+ \item \sml{reset} -- This function resets all counters associated
+with all virtual cells.
+\end{itemize}
+
+\begin{SML}
+signature CELLS = sig
+ include CELLS_BASIS
+ val GP : cellkind
+ val FP : cellkind
+ val CC : cellkind
+ val MEM : cellkind
+ val CTRL : cellkind
+ val toString : cellkind -> cell -> string
+ val stackptrR : cell
+ val asmTmpR : cell
+ val fasmTmp : cell
+ val zeroReg : cellkind -> cell option
+
+ type cellset
+
+ val empty : cellset
+ val addCell : cellkind -> cell * cellset -> cellset
+ val rmvCell : cellkind -> cell * cellset -> cellset
+ val addReg : cell * cellset -> cellset
+ val rmvReg : cell * cellset -> cellset
+ val addFreg : cell * cellset -> cellset
+ val rmvFreg : cell * cellset -> cellset
+ val getCell : cellkind -> cellset -> cell list
+ val updateCell : cellkind -> cellset * cell list -> cellset
+
+ val cellsetToString : cellset -> string
+ val cellsetToString' : (cell -> cell) -> cellset -> string
+
+ val cellsetToCells : cellset -> cell list
+end
+\end{SML}
+
+\begin{itemize}
+ \item \sml{toString} -- convert a cell id of a certain cellkind into
+its assembly name.
+ \item \sml{stackptrR} -- the cell id of the stack pointer register.
+ \item \sml{asmTmpR} -- the cell id of the assembly temporary
+ \item \sml{fasmTmp} -- the cell id of the floating point temporary
+ \item \sml{zeroReg} -- given the cellkind, returns the cell id of the
+ source that always hold the value of zero, if there is any.
+ \item \sml{empty} -- an empty cellset
+ \item \sml{addCell} -- inserts a cell into a cellset
+ \item \sml{rmvCell} -- remove a cell from a cellset
+ \item \sml{addReg} -- abbreviation for \sml{addCell GP}
+ \item \sml{rmvReg} -- abbreviation for \sml{rmvCell GP}
+ \item \sml{addFreg} -- abbreviation for \sml{addCell FP}
+ \item \sml{rmvFreg} -- abbreviation for \sml{rmvCell FP}
+ \item \sml{getCell} -- lookup all cells of a particular cellkind from
+the cellset
+ \item \sml{updateCell} -- replace all cells of a particular cellkind
+from the cellset.
+ \item \sml{cellsetToString} -- pretty print a cellset
+ \item \sml{cellsetToString'} -- pretty print a cellset, but first
+apply a regmap function.
+ \item \sml{cellsetToCells} -- convert a cellset into list form.
+\end{itemize}
diff --git a/MLRISC/Doc/latex/cluster.tex b/MLRISC/Doc/latex/cluster.tex
new file mode 100644
index 0000000..bb820f9
--- /dev/null
+++ b/MLRISC/Doc/latex/cluster.tex
@@ -0,0 +1,53 @@
+\section{Cluster}
+
+A \newdef{cluster}
+represents a compilation unit in linearized form,
+and contains information about the control flow, global annotations,
+block and edge execution frequencies, and live-in/live-out information.
+
+Its signature is:
+\begin{SML}
+signature FLOWGRAPH = sig
+ structure C : \href{cells.html}{CELLS}
+ structure I : \href{instructions.html}{INSTRUCTIONS}
+ structure P : \href{pseudo-ops.html}{PSEUDO_OPS}
+ structure W : \href{freq.html}{FREQ}
+ sharing I.C = C
+
+ datatype block =
+ PSEUDO of P.pseudo_op
+ | LABEL of Label.label
+ | BBLOCK of
+ \{ blknum : int,
+ freq : W.freq ref,
+ annotations : Annotations.annotations ref,
+ liveIn : C.cellset ref,
+ liveOut : C.cellset ref,
+ succ : edge list ref,
+ pred : edge list ref,
+ insns : I.instruction list ref
+ \}
+ | ENTRY of
+ \{blknum : int, freq : W.freq ref, succ : edge list ref\}
+ | EXIT of
+ \{blknum : int, freq : W.freq ref, pred : edge list ref\}
+ withtype edge = block * W.freq ref
+
+ datatype cluster =
+ CLUSTER of \{
+ blocks: block list,
+ entry : block,
+ exit : block,
+ regmap: C.regmap,
+ blkCounter : int ref,
+ annotations : Annotations.annotations ref
+ \}
+end
+\end{SML}
+
+Clusters are used in
+\href{span-dep.html}{span dependency resolution},
+\href{delayslots.html}{delay slot filling},
+\href{asm.html}{assembly},
+and \href{mc.html}{machine code}
+output, since these phases require the code laid out in linearized form.
diff --git a/MLRISC/Doc/latex/compiler-graphs.tex b/MLRISC/Doc/latex/compiler-graphs.tex
new file mode 100644
index 0000000..52322da
--- /dev/null
+++ b/MLRISC/Doc/latex/compiler-graphs.tex
@@ -0,0 +1,434 @@
+\section{Basic Compiler Graphs}
+
+\subsection{Introduction}
+In this section we describe the set of core compiler specific graphs and
+algorithms implemented in MLRISC.
+Mostly of these algorithms are parameterized with respect
+to the actual intermediate representation, and as such they
+do not provide many facilities that are provided by higher abstraction
+layers, such as in \href{mlrisc-ir.html}{MLRISC IR},
+or in \href{SSA.html}{SSA}.
+
+\subsubsection{Dominator/Post-dominator Trees}
+\newdef{Dominance}
+is a fundamental concept in compiler optimizations.
+Node $A$ $dominates$ $B$
+iff all paths from the start node
+to $B$ intersects A. A dual notion is the concept of
+$post-dominance$:
+$A$ \newdef{post-dominates} $B$ iff all paths from $B$ to the stop node
+intersects $A$. A (post-)dominator tree can be used
+to summarize the dominance/post-dominance relationship.
+
+\begin{SML}
+ functor \mlrischref{ir/dominator.sml}{DominatorTree}
+ (GraphImpl : GRAPH_IMPLEMENTATION) : DOMINATOR_TREE
+\end{SML}
+ The functor implements dominator analysis and
+creates a dominator/post-dominator tree from a graph $G$. A dominator tree is implemented as a graph
+with the following definition:
+\begin{SML}
+ signature \mlrischref{ir/dominator.sig}{DOMINATOR_TREE} = sig
+ exception Dominator
+ datatype 'n dom_node =
+ DOM of \{ node : 'n, level : int, preorder : int, postorder : int \}
+ type ('n,'e,'g) dom_info
+ type ('n,'e,'g) dominator_tree = ('n dom_node,unit,('n,'e,'g) dom_info) graph
+ type ('n,'e,'g) postdominator_tree = ('n dom_node,unit,('n,'e,'g) dom_info) graph
+\end{SML}
+
+We annotated each node in
+a dominator tree with three extra fields of information, which
+is useful for other algorithms:
+\begin{itemize}
+ \item\sml{level} is the nesting level of the tree. The root
+ node has level 0, children of the root has level 1 and so on.
+ \item\sml{preorder} is the preorder numbering of a node
+ \item\sml{preorder} is the postorder numbering of a node.
+\end{itemize}
+
+To create a dominator tree and a postdominator tree
+from a graph, the following function should be called.
+\begin{SML}
+ val dominator_trees : ('n,'e,'g) graph ->
+ ('n,'e,'g) dominator_tree * ('n,'e,'g) postdominator_tree
+\end{SML}
+We use the algorithm of Tarjan and Lengauer, which
+runs in time $O(|V+E|\alpha(|V+E|))$ where $\alpha$ is the functional
+inverse of the Ackermann function.
+
+To perform many common queries on a dominator tree, we first
+call the function \sml{methods} to obtain a method object.
+\begin{SML}
+ val methods : ('n,'e,'g) dominator_tree -> dominator_methods
+\end{SML}
+
+The methods are packed into the following type:
+\begin{SML}
+ type dominator_methods =
+ \{ dominates : node_id * node_id -> bool,
+ immediately_dominates : node_id * node_id -> bool,
+ strictly_dominates : node_id * node_id -> bool,
+ postdominates : node_id * node_id -> bool,
+ immediately_postdominates : node_id * node_id -> bool,
+ strictly_postdominates : node_id * node_id -> bool,
+ control_equivalent : node_id * node_id -> bool,
+ idom : node_id -> node_id, $(* ~1 if none *)$
+ idoms : node_id -> node_id list,
+ doms : node_id -> node_id list,
+ ipdom : node_id -> node_id, $(* ~1 if none *)$
+ ipdoms : node_id -> node_id list,
+ pdoms : node_id -> node_id list,
+ dom_lca : node_id * node_id -> node_id,
+ pdom_lca : node_id * node_id -> node_id,
+ dom_level : node_id -> int,
+ pdom_level : node_id -> int,
+ control_equivalent_partitions : unit -> node_id list list
+ \}
+\end{SML}
+
+The query methods are as follows:
+\begin{methods}
+ dominates($a,b$) & returns true iff $a$ dominates $b$ \\
+ immediately\_dominates($a,b$) & returns true iff $a$ immediately dominates $b$ \\
+ strictly\_dominates($a,b$) & returns true iff $a$ strictly dominates $b$ \\
+ postdominates($a,b$) & returns true iff $a$ post-dominates $b$ \\
+ immediately\_postdominates($a,b$) & returns true iff $a$ immediately post-dominates $b$ \\
+ strictly\_postdominates($a,b$) & returns true iff $a$ strictly post-dominates $b$ \\
+ control\_equivalent($a,b$) &
+ returns true iff $a$ dominates $b$ and vice versa \\
+ idom($a$) & returns the immediate dominator of $a$, or $-1$ if none exists \\
+ idoms($a$) & returns all nodes that $a$ immediately dominates \\
+ doms($a$) & returns all nodes that $a$ dominates (including $a$ itself) \\
+ ipdom($a$) & returns the immediate post-dominator of $a$, or $-1$ if none exists \\
+ ipdoms($a$) & returns all nodes that $a$ immediately post-dominates \\
+ pdoms($a$) & returns all nodes that $a$ post-dominates (including $a$ itself) \\
+ dom\_lca($a,b$) & returns the least common ancestor of $a$ and $b$ in
+ the dominator tree \\
+ pdom\_lca($a,b$) & returns the least common ancestor of $a$ and $b$
+ in the post-dominator tree \\
+ dom\_level($a$) & returns the nesting level of $a$ in the dominator tree \\
+ pdom\_level($b$) & returns the nesting level of $a$ in the post-dominator
+ tree \\
+ control\_equivalent\_partitions & partitions the graph into
+ a set of control equivalent nodes.
+\end{methods}
+
+The methods \sml{dom_lca}, \sml{pdom_lca} and
+\sml{control_equivalent_partitions} executes in $O(n)$ time, where
+$n$ is the size of the dominator tree. The other methods run in $O(1)$ time.
+
+\subsubsection{Control Dependence Graph}
+Given two nodes $A$ and $B$ in a control flow graph $G$,
+we say that $B$ is \newdef{control dependent} on $A$ iff
+\begin{itemize}
+ \item $B$ post-dominates a successor of $A$
+ \item $B$ does not strictly post-dominates $A$
+\end{itemize}
+Intuitively, $B$ is control dependent on $A$ means that
+some path in the program that goes through $A$ can by-passed $B$,
+and furthermore, $A$ is the point in which this divergence can occur.
+Control dependence is used to various kinds of analysis and optimizations in
+a compiler, such as code motion and global scheduling~\cite{bernstein-rodeh}.
+
+To build a control dependence graph, the functor
+\sml{ControlDependenceGraph} can be used:
+\begin{SML}
+ signature \mlrischref{ir/cdg.sig}{CONTROL_DEPENDENCE_GRAPH} = sig
+ type ('n,'e,'g) cdg = ('n,'e,'g) graph
+
+ val control_dependence_graph :
+ ('e -> bool) ->
+ ('n,'e,'g) dominator_tree *
+ ('n,'e,'g) postdominator_tree ->
+ ('n,'e,'g) cdg
+ end
+ functor \mlrischref{ir/cdg.sml}{ControlDependenceGraph}
+ (structure Dom : DOMINATOR_TREE
+ structure GraphImpl : GRAPH_IMPLEMENTATION
+ ) : CONTROL_DEPENDENCE_GRAPH
+\end{SML}
+The control depedence graph is a subcomponent of the
+program dependence graph commonly used in
+modern compiler optimizations.
+
+\subsubsection{Dominance Frontiers}
+
+Many algorithms involving the notion of control dependence or dominance
+can be rephrased in terms of \newdef{dominance frontiers}.
+A node $A$ is in the dominance frontiers of $B$ iff
+$B$ dominates a predecessor of $A$ but $B$ does not strictly-dominate $A$.
+We denote this as $A \in DF(B)$.
+The dual notion of \newdef{post-dominance frontiers} can be defined
+analogously using the post-dominator tree\footnote{Control dependence
+can be defined in terms of post-dominance frontiers.}.
+
+\begin{SML}
+ functor \mlrischref{ir/dominance-frontier.sml}{DominanceFrontiers}(Dom : DOMINATOR_TREE) : DOMINANCE_FRONTIERS
+\end{SML}
+The functor \sml{DominanceFrontiers} can be used to
+compute all the dominance frontiers of all the nodes in a graph.
+It has the following signature.
+
+\begin{SML}
+ signature \mlrischref{ir/dominance-frontier.sig}{DOMINANCE_FRONTIERS} = sig
+ structure Dom : DOMINATOR_TREE
+ type dominance_frontiers = node_id list array
+ val DFs : ('n,'e,'g) Dom.dominator_tree -> dominance_frontiers
+ end
+\end{SML}
+
+\subsubsection{Iterated Dominance Frontiers}
+
+\newdef{Iterated dominance frontiers} (denoted as $DF^+$) are defined
+as the least fixed point of iterating the operation $DF$. Formally,
+define the dominance frontiers on a set $S$ as follows:
+\[
+ DF(S) \defas \Union_{A \in S} DF(A)
+\]
+Define iteration of $DF$, denoted as $DF^n$, as follows:
+\begin{eqnarray*}
+ DF^1(S) & \defas & DF(S) \\
+ DF^{n+1}(S) & \defas & DF(S \union DF^n(S)) \\
+\end{eqnarray*}
+The iterated dominance frontiers $DF^+(S)$ on a set $S$ are defined as
+the limit:
+\[
+ DF^+(S) \defas \lim_{n \to \infty} DF^n(S)
+\]
+
+Iterated dominance frontiers of a set $S$ can be computed in
+time $O(|S|+|V|+|E|)$ using the
+algorithm by Sreedhar and Gao~\cite{linear-time-IDF}\footnote{
+In practice it is often sub-linear in $|V|+|E|$.}.
+
+\begin{SML}
+ functor \mlrischref{ir/djgraph.sml}{DJGraph}(Dom : DOMINATOR_TREE) : DJ_GRAPH
+\end{SML}
+The functor \sml{DJGraph} implements this algorithm.
+It satisfies the signature below:
+\begin{SML}
+ signature \mlrischref{ir/djgraph.sig}{DJ_GRAPH} = sig
+ structure Dom : DOMINATOR_TREE
+ type ('n,'e,'g) dj_graph = ('n,'e,'g) Dom.dominator_tree
+ val dj_graph : ('n,'e,'g) dj_graph ->
+ \{ DF : node_id -> node_id list,
+ IDF : node_id -> node_id list,
+ IDFs : node_id list -> node_id list
+ \}
+ end
+\end{SML}
+The function \sml{dj_graph} takes a dominator tree and returns
+three query methods for computing dominance and iterated dominance frontiers.
+Method \sml{DF} computes $DF(v)$ for a single node $v$.
+Method \sml{IDF} computes the $DF^+(v)$, and method
+\sml{IDFs} computes $DF^+(S)$ when given a set of node ids.
+The dominator tree must not be updated while these operations
+are being performed.
+
+Sreedhar's original algorithm is phrased in terms of the
+DJ-graph, which is a fusion of the dominator tree
+with its underlying flowgraph. Our variant operates on the
+dominator tree and the flowgraph at the same time, without
+building an intermediate data structure.
+
+Iterated dominance frontiers are used
+in many algorithms that deal with the notion of dominance.
+For example, our SSA construction algorithm uses iterated
+dominance frontiers to identify confluent points in the program
+where $phi$-functions are to be placed.
+
+\subsubsection{Loop Nesting Tree}
+
+A \newdef{natural loop} $L$ in a graph is a maximal
+strongly connected component
+such that all nodes in $L$ are dominated by a single node $h$, called
+the \newdef{loop header}. Loops tend to form good optimization candidates
+and consequently \newdef{loop detection} is an essential task in a compiler.
+The functor
+\begin{SML}
+ functor \mlrischref{ir/loop-structure.sml}{LoopStructure}
+ (structure GraphImpl : GRAPH_IMPLEMENTATION
+ structure Dom : DOMINATOR_TREE
+ ) : LOOP_STRUCTURE
+\end{SML}
+recognizes all natural loops in a graph and built a
+\newdef{loop nesting tree}
+that describes the loop nesting relationship between graphs.
+
+\begin{SML}
+ signature \mlrischref{ir/loop-structure.sig}{LOOP_STRUCTURE} = sig
+ structure Dom : DOMINATOR_TREE
+ datatype ('n,'e,'g) loop =
+ LOOP of \{ nesting : int,
+ header : node_id,
+ loop_nodes : node_id list,
+ backedges : 'e edge list,
+ exits : 'e edge list
+ \}
+
+ type ('n,'e,'g) loop_info
+ type ('n,'e,'g) loop_structure = (('n,'e,'g) loop,unit, ('n,'e,'g) loop_info) graph
+
+ val loop_structure : ('n,'e,'g) Dom.dominator_tree -> ('n,'e,'g) loop_structure
+ val nesting_level : ('n,'e,'g) loop_structure -> node_id array
+ val header : ('n,'e,'g) loop_structure -> node_id array
+ end
+\end{SML}
+
+Our algorithm computes the loop nesting tree in time
+$O((|V|+|E|)\alpha(|V|+|E|))$.
+Each node in this tree represents a loop in the flowgraph, except the
+root of the tree, which represents the entire graph.
+Given a flowgraph $G$, the root
+of the loop nesting tree is defined to be the sole vertex in
+\sml{#entry} $G$. Other nodes in the tree
+are indexed by the loop header node ids.
+
+Loop detection classifies each loop and for
+each loop $L$, the following information is obtained:
+\begin{itemize}
+ \item An integer \sml{nesting}. The root of the tree has nesting
+ depth 0. The top level loops have nesting depth 1, etc.
+ \item The node id of the loop \sml{header} $h$.
+ \item A set of \sml{loop_nodes}. Loop nodes are
+ nodes that are in the strongly connected
+ component $L$, but excluding the header $h$
+ and all nodes that are part of any nested loops.
+ Thus all nodes are uniquely partitioned in header nodes and
+ loop nodes, and loop nodes are further partitioned into different
+ sets according to which headers they are immediately nested under.
+ \item A set of \sml{backedges}. A back-edge is an
+ edge that targets the header $h$ and originates from a loop node
+ in $L$.
+ \item A set of loop \sml{exits}. An exit-edge is an edge
+ that originates from a loop node within $L$
+ targets a node outside of $L$. Note that this set does not include
+ any exit-edges contained in loops nested in $L$ but
+ target a node out of $L$.
+\end{itemize}
+
+\subsubsection{Static Single Assignment}
+
+An SSA construction algorithm based on~\cite{SSA,Briggs-SSA,linear-time-IDF}
+is implemented in the following functor:
+\begin{SML}
+ functor \mlrischref{ir/ssa.sml}{StaticSingleAssignmentForm}
+ (Dom : DOMINATOR_TREE) : STATIC_SINGLE_ASSIGNMENT_FORM
+\end{SML}
+
+SSA-based optimizations in MLRISC
+are actually implemented on top of a
+high-level SSA layer described in Section~\ref{sec:ssa}.
+So it is not necessary to use this module directly. Nevertheless,
+there can be situations in which this module can be specialized in other
+ways; for example, in the construction of sparse evaluation graphs.
+
+\begin{SML}
+ signature \mlrischref{ir/ssa.sig}{STATIC_SINGLE_ASSIGNMENT_FORM} = sig
+ structure Dom : DOMINATOR_TREE
+ type var = int
+ type phi = var * var * var list $(* orig def/def/uses *)$
+ type renamer = \{defs : var list, uses: var list\} ->
+ \{defs : var list, uses: var list\}
+ type copy = \{dst : var list, src: var list\} -> unit
+
+ val compute_ssa :
+ ('n,'e,'g) Dom.dominator_tree ->
+ \{ max_var : var,
+ defs : 'n node -> var list,
+ is_live : var * int -> bool,
+ rename_var : var -> var,
+ rename_stmt : \{rename:renamer,copy:copy\} -> 'n node -> unit,
+ insert_phi : \{block : 'n node,
+ in_edges : 'e edge list,
+ phis : phi list
+ \} -> unit
+ \} -> unit
+ end
+\end{SML}
+
+This module defines the function \sml{compute_ssa}, which
+constructs an SSA graph. It requires
+the following information from the client:
+\begin{itemize}
+\item A dominator tree of the flowgraph.
+\item \sml{max_var} -- the maximum variable id (integer) that exists
+in the flowgraph. All variables are assumed to be indexed by non-negative
+ integers.
+\item \sml{defs}($X$) -- a function that returns $defs(X)$,
+i.e.~the set of variable names defined in block $X$.
+If a minimal SSA form is desired, this set should include all the definitions
+in $X$. If a pruned SSA form is required, this set should
+include only the set of names that are live-out in $X$.
+\item \sml{is_live}($v,X$) -- a function that determines if
+variable $v$ is live-in into block $X$. If not, a $\phi$-function will
+not be placed in $X$. For example, to compute
+the minimal-SSA form, this function should always return true.
+\item \sml{rename_var}($v$) -- a function that returns a new
+unique name for variable $v$.
+\item \sml{rename_stmt} -- a function of type
+ \sml{{rename:renamer,copy:copy} -> 'n node -> unit} where
+\begin{SML}
+ type renamer = \{defs : var list, uses: var list\} ->
+ \{defs : var list, uses: var list\}
+ type copy = \{dst : var list, src: var list\} -> unit
+\end{SML}
+Function \sml{rename_stmt} is called for each block
+in the flowgraph in the order of the dominator tree, and
+is responsible for renaming all the variables in $X$ by
+calling the functions \sml{renamer} or \sml{copy}.
+Function \sml{renamer} renames all definitions and uses of
+a statement, while function \sml{copy} renames
+of a set of parallel assignments
+\item \sml{insert_phi}($X$,$es$,$phis$) --
+ a function that inserts a set of
+ $\phi$-definitions $phis$ in block $X$, where $es$
+ is the list of control flow edges that merge into block $X$.
+\end{itemize}
+
+\subsubsection{IDEFS/IUSE sets}
+Reif and Tarjan define the following useful notions for
+computing approximate birth-points for expressions, which in turn
+can be used to drive other optimizations.
+Given a node $X$, let $idom(X)$ denote the immediate dominator of $X$.
+Let $def(X)$ ($use(X)$) denote all the definitions (uses) in $X$.
+Given a path $p \equiv v_1\ldots v_n$, define $def(p)$ ($use(p)$) as
+\begin{eqnarray*}
+ def(v_1\ldots v_n) & \equiv &\union_{i \in 1 \ldots n} def(v_i) \\
+ use(v_1\ldots v_n) & \equiv &\union_{i \in 1 \ldots n} use(v_i)
+\end{eqnarray*}
+
+Let $P(X)$ denotes all the paths from $idom(X)$ to $X$
+that does not cross $idom(X)$ internally. Then define
+$idef(X)$ ($iuse(X)$) as:
+\begin{eqnarray*}
+ idef(X) & \equiv & \Union_{idom(X) v_1 \ldots v_n X \in P(X)}
+ def(v_1\ldots v_n) \\
+ iuse(X) & \equiv & \Union_{idom(X) v_1 \ldots v_n X \in P(X)}
+ use(v_1\ldots v_n)
+\end{eqnarray*}
+The sets $ipostdef(X)$ and $ipostuse(X)$ are defined analogously
+using the postdominator tree.
+
+\begin{SML}
+ signature \mlrischref{ir/idefs2.sig}{IDEFS} = sig
+ type var = int
+ val compute_idefs :
+ \{def_use : 'n Graph.node -> var list * var list,
+ cfg : ('n,'e,'g) Graph.graph
+ \} ->
+ \{ idefuse : unit -> (RegSet.regset * RegSet.regset) Array.array,
+ ipostdefuse : unit -> (RegSet.regset * RegSet.regset) Array.array
+ \}
+ end
+ structure \mlrischref{ir/idefs2.sml}{IDefs} : IDEFS
+\end{SML}
+Structure \sml{IDefs} implements the function
+\sml{comput_idefs} for computing
+the $idef$, $iuse$, $ipostdef$ and $ipostuse$ sets of a control flow
+graph. It takes as arguments a flowgraph and a function \sml{def_use}, which
+takes a graph node and returns the def/use sets of the node.
+It returns two functions \sml{idefuse} and \sml{ipostdefuse} which
+compute the $idef/iuse$ and $ipostdef/ipostuse$ sets. These sets
+are returned as arrays indexed by node ids.
diff --git a/MLRISC/Doc/latex/constants.tex b/MLRISC/Doc/latex/constants.tex
new file mode 100644
index 0000000..2d55833
--- /dev/null
+++ b/MLRISC/Doc/latex/constants.tex
@@ -0,0 +1,36 @@
+\section{Client Defined Constants}
+\subsubsection{Introduction}
+MLRISC allows the client to inject abstract
+\newdef{constants} that are resolved
+only at the end of the compilation phase into the instruction stream.
+These constants can be used whereever an integer literal is expected.
+Typical usage are stack frame offsets for spill locations which are only
+known after register allocation,
+and garbage collection and exception map which are resolved only
+when all address calculation are performed.
+
+\subsubsection{The Details}
+Client defined constants should satsify the following signature:
+\begin{SML}
+signature \mlrischref{instructions/constant.sig}{CONSTANT} = sig
+ type const
+
+ val toString : const -> string
+ val valueOf : const -> int
+ val hash : const -> word
+ val == : const * const -> bool
+end
+\end{SML}
+
+The methods are:
+\begin{methods}
+ toString & a pretty printing function \\
+ valueOf & returns the value of the constant \\
+ hash & returns the hash value of the constant \\
+ == & compare two constants for identity \\
+\end{methods}
+
+The method \sml{toString} should be implemented in all cases.
+The method \sml{valueOf} is necessary only if machine code generation
+is used. The last two methods, \sml{hash} and \sml{==} are necessary
+only if SSA optimizations are used.
diff --git a/MLRISC/Doc/latex/contributions.tex b/MLRISC/Doc/latex/contributions.tex
new file mode 100644
index 0000000..bc5e56f
--- /dev/null
+++ b/MLRISC/Doc/latex/contributions.tex
@@ -0,0 +1,60 @@
+\section{Contributions}
+ The optimizations provided by MLRISC are at a similar level to
+ those performed by the Impact compiler; several target back ends
+ exist (Dec Alpha, HPPA, Sparc, x86, and PPC); but more importantly, the
+ framework has been demonstrated in \href{systems.html}{real use}
+ for languages with radically different execution models. These include:
+
+ \begin{center}
+ \begin{tabular}{|c|c|} \hline
+ Compiler & Association \\ \hline
+ \begin{color}{#005500}SML/NJ\end{color} & Bell Labs and Princeton\\\hline
+ \begin{color}{#005500}TIL\end{color} & CMU \\ \hline
+ \begin{color}{#005500}Tiger\end{color} & Princeton \\ \hline
+ \begin{color}{#005500}C--\end{color} & OGI \\ \hline
+ \begin{color}{#005500}SML/Regions\end{color} & DIKU \\ \hline
+ \begin{color}{#005500}Moby\end{color} & Bell Labs \\ \hline
+ \end{tabular}
+ \end{center}
+
+ The strength of MLRISC lies in the ability to easily create high
+ quality code generator for each of these systems. For example:
+
+ \begin{description}
+ \item[Tiger:] Has an execution
+ model very similar to C with stack allocated activation frames,
+ and also maintains static and dynamic chains to support lexical
+ scoping.
+
+ \item[TIL:] Is similar to C in its
+ use of activation frames, however it uses a
+ \emph{typed intermediate language} that
+ supports \emph{almost tag-free}
+ garbage collection. This has severe implications on the
+ interaction of spilling and garbage collection. The set of live
+ variables and their locations, be it registers or frame slots,
+ is recorded in a trace table for a specific program point. When
+ spilling occurs, it is necessary to adjust some of these trace
+ tables to reflect the new locations of live variables.
+
+ \item[SML/NJ:] Has no runtime
+ stack, but stores all execution context in a garbage collected
+ heap. This arrangement imposes special requirements for spilling
+ registers. SML/NJ also does \emph{dynamic linking} --- that is
+ to say, no use is made of a conventional linker, but machine
+ code is generated directly and linked into the interactive
+ environment, dynamically.
+
+ \item[C--:] Is a C-like portable assembly
+ language used as an intermediate language for high level typed language,
+ and provides direct compilation support for exceptions and
+ precise garbage collection. In addition, it allows
+ interoperability with C function calls.
+\end{description}
+
+ It is not uncommon for any of these systems to store special global
+ values in dedicated registers, and use their own parameter passing
+ and callee-save conventions. In any language that supports garbage
+ collection, there are also the issues of generating gc type maps,
+ and gc-safety in aggressive optimizations. MLRISC deals with all these
+ important issues by allowing customization of many aspects of the system.
diff --git a/MLRISC/Doc/latex/contributors.tex b/MLRISC/Doc/latex/contributors.tex
new file mode 100644
index 0000000..2c1da87
--- /dev/null
+++ b/MLRISC/Doc/latex/contributors.tex
@@ -0,0 +1,15 @@
+\section{Contributors}
+ \subsubsection{Past}
+ \begin{itemize}
+ \item Florent Guillame (INRIA)
+ \item George C. Necula (CMU)
+ \item Ken Cline (CMU)
+ \item Andrew Bernard (CMU)
+ \item Dino Oliva (NEC)
+ \end{itemize}
+
+\subsubsection{Present}
+ \begin{itemize}
+ \item Allen Leung (NYU)
+ \item Fermin Reig (University of Glasgow)
+ \end{itemize}
diff --git a/MLRISC/Doc/latex/delayslots.tex b/MLRISC/Doc/latex/delayslots.tex
new file mode 100644
index 0000000..2164534
--- /dev/null
+++ b/MLRISC/Doc/latex/delayslots.tex
@@ -0,0 +1,174 @@
+\section{Delay Slot Filling}
+\subsection{ Overview }
+
+ Superscalar architectures such as the Sparc, MIPS, and PA-RISC
+contain delayed branch and/or load instructions.
+Delay slot filling is necessary
+task of the back end to keep the instruction pipelines busy. To accomodate
+the intricate semantics of branch delay slot in various architectures,
+MLRISC uses the following very general framework for dealing with
+delayed instructions.
+
+\begin{description}
+ \item[Instruction representation]
+ To make it easy to deal with instruction with delay slot, MLRISC allow
+ the following extensions to instruction representations.
+ \begin{itemize}
+ \item Instructions with delay slot may have a
+ \begin{color}{#aa0000}nop\end{color} flag. When this flag is true
+ the delay slot is assumed to be filled with a NOP instruction.
+ \item Instructions with delay slots that can be nullified may have a
+ \begin{color}{#aa0000}nullified\end{color} flag.
+ When this flag is true the branch delay slot is assumed to be
+ nullified.
+ \end{itemize}
+ \item[Nullification semantics]
+ Unfortunately, nullification semantics
+ in architectures vary. In general, MLRISC allows the following
+ additional nullification characteristics to be specified.
+ \begin{itemize}
+ \item Nullification can be specified as illegal; this is needed
+ because some instructions can not be nullified
+ \item When nullification is enabled, the semantics of the delay slot
+ instruction may depend on the direction of the branch, and whether
+ a conditional test succeeds.
+ \item Certain class of instructions may be declared to be illegal
+ to fit into certain class of delay slots.
+ \end{itemize}
+\end{description}
+
+For example, conditional branch instructions on the Sparc are defined
+as follows:
+\begin{verbatim}
+ Bicc of {b:branch, a:bool, label:Label.label, nop:bool}
+ asm: ``b\t